spark git commit: [SPARK-25469][SQL] Eval methods of Concat, Reverse and ElementAt should use pattern matching only once
Repository: spark Updated Branches: refs/heads/master 411ecc365 -> 2c9d8f56c [SPARK-25469][SQL] Eval methods of Concat, Reverse and ElementAt should use pattern matching only once ## What changes were proposed in this pull request? The PR proposes to avoid usage of pattern matching for each call of ```eval``` method within: - ```Concat``` - ```Reverse``` - ```ElementAt``` ## How was this patch tested? Run the existing tests for ```Concat```, ```Reverse``` and ```ElementAt``` expression classes. Closes #22471 from mn-mikke/SPARK-25470. Authored-by: Marek Novotny Signed-off-by: Takeshi Yamamuro Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c9d8f56 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c9d8f56 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c9d8f56 Branch: refs/heads/master Commit: 2c9d8f56c71093faf152ca7136c5fcc4a7b2a95f Parents: 411ecc3 Author: Marek Novotny Authored: Fri Sep 21 18:16:54 2018 +0900 Committer: Takeshi Yamamuro Committed: Fri Sep 21 18:16:54 2018 +0900 -- .../expressions/collectionOperations.scala | 81 1 file changed, 48 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2c9d8f56/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index e23ebef..161adc9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1268,11 +1268,15 @@ case class Reverse(child: Expression) extends UnaryExpression with ImplicitCastI override def dataType: DataType = child.dataType - @transient private lazy val elementType: DataType = dataType.asInstanceOf[ArrayType].elementType + override def nullSafeEval(input: Any): Any = doReverse(input) - override def nullSafeEval(input: Any): Any = input match { -case a: ArrayData => new GenericArrayData(a.toObjectArray(elementType).reverse) -case s: UTF8String => s.reverse() + @transient private lazy val doReverse: Any => Any = dataType match { +case ArrayType(elementType, _) => + input => { +val arrayData = input.asInstanceOf[ArrayData] +new GenericArrayData(arrayData.toObjectArray(elementType).reverse) + } +case StringType => _.asInstanceOf[UTF8String].reverse() } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -1294,6 +1298,7 @@ case class Reverse(child: Expression) extends UnaryExpression with ImplicitCastI val i = ctx.freshName("i") val j = ctx.freshName("j") +val elementType = dataType.asInstanceOf[ArrayType].elementType val initialization = CodeGenerator.createArrayData( arrayData, elementType, numElements, s" $prettyName failed.") val assignment = CodeGenerator.createArrayAssignment( @@ -2164,9 +2169,11 @@ case class ElementAt(left: Expression, right: Expression) extends GetMapValueUti override def nullable: Boolean = true - override def nullSafeEval(value: Any, ordinal: Any): Any = { -left.dataType match { - case _: ArrayType => + override def nullSafeEval(value: Any, ordinal: Any): Any = doElementAt(value, ordinal) + + @transient private lazy val doElementAt: (Any, Any) => Any = left.dataType match { +case _: ArrayType => + (value, ordinal) => { val array = value.asInstanceOf[ArrayData] val index = ordinal.asInstanceOf[Int] if (array.numElements() < math.abs(index)) { @@ -2185,9 +2192,9 @@ case class ElementAt(left: Expression, right: Expression) extends GetMapValueUti array.get(idx, dataType) } } - case _: MapType => -getValueEval(value, ordinal, mapKeyType, ordering) -} + } +case _: MapType => + (value, ordinal) => getValueEval(value, ordinal, mapKeyType, ordering) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -2278,33 +2285,41 @@ case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpressio override def foldable: Boolean = children.forall(_.foldable) - override def eval(input: InternalRow): Any = dataType match { + override def eval(input: InternalRow): Any = doConcat(input) + + @transient private lazy val doConcat: InternalRow => Any = dataType match { case Binary
spark git commit: [SPARK-25469][SQL] Eval methods of Concat, Reverse and ElementAt should use pattern matching only once
Repository: spark Updated Branches: refs/heads/branch-2.4 e42546259 -> 604828eda [SPARK-25469][SQL] Eval methods of Concat, Reverse and ElementAt should use pattern matching only once ## What changes were proposed in this pull request? The PR proposes to avoid usage of pattern matching for each call of ```eval``` method within: - ```Concat``` - ```Reverse``` - ```ElementAt``` ## How was this patch tested? Run the existing tests for ```Concat```, ```Reverse``` and ```ElementAt``` expression classes. Closes #22471 from mn-mikke/SPARK-25470. Authored-by: Marek Novotny Signed-off-by: Takeshi Yamamuro (cherry picked from commit 2c9d8f56c71093faf152ca7136c5fcc4a7b2a95f) Signed-off-by: Takeshi Yamamuro Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/604828ed Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/604828ed Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/604828ed Branch: refs/heads/branch-2.4 Commit: 604828eda0930b933be39d5db7bdb1b29d499f32 Parents: e425462 Author: Marek Novotny Authored: Fri Sep 21 18:16:54 2018 +0900 Committer: Takeshi Yamamuro Committed: Fri Sep 21 18:30:32 2018 +0900 -- .../expressions/collectionOperations.scala | 81 1 file changed, 48 insertions(+), 33 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/604828ed/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index e23ebef..161adc9 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -1268,11 +1268,15 @@ case class Reverse(child: Expression) extends UnaryExpression with ImplicitCastI override def dataType: DataType = child.dataType - @transient private lazy val elementType: DataType = dataType.asInstanceOf[ArrayType].elementType + override def nullSafeEval(input: Any): Any = doReverse(input) - override def nullSafeEval(input: Any): Any = input match { -case a: ArrayData => new GenericArrayData(a.toObjectArray(elementType).reverse) -case s: UTF8String => s.reverse() + @transient private lazy val doReverse: Any => Any = dataType match { +case ArrayType(elementType, _) => + input => { +val arrayData = input.asInstanceOf[ArrayData] +new GenericArrayData(arrayData.toObjectArray(elementType).reverse) + } +case StringType => _.asInstanceOf[UTF8String].reverse() } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -1294,6 +1298,7 @@ case class Reverse(child: Expression) extends UnaryExpression with ImplicitCastI val i = ctx.freshName("i") val j = ctx.freshName("j") +val elementType = dataType.asInstanceOf[ArrayType].elementType val initialization = CodeGenerator.createArrayData( arrayData, elementType, numElements, s" $prettyName failed.") val assignment = CodeGenerator.createArrayAssignment( @@ -2164,9 +2169,11 @@ case class ElementAt(left: Expression, right: Expression) extends GetMapValueUti override def nullable: Boolean = true - override def nullSafeEval(value: Any, ordinal: Any): Any = { -left.dataType match { - case _: ArrayType => + override def nullSafeEval(value: Any, ordinal: Any): Any = doElementAt(value, ordinal) + + @transient private lazy val doElementAt: (Any, Any) => Any = left.dataType match { +case _: ArrayType => + (value, ordinal) => { val array = value.asInstanceOf[ArrayData] val index = ordinal.asInstanceOf[Int] if (array.numElements() < math.abs(index)) { @@ -2185,9 +2192,9 @@ case class ElementAt(left: Expression, right: Expression) extends GetMapValueUti array.get(idx, dataType) } } - case _: MapType => -getValueEval(value, ordinal, mapKeyType, ordering) -} + } +case _: MapType => + (value, ordinal) => getValueEval(value, ordinal, mapKeyType, ordering) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { @@ -2278,33 +2285,41 @@ case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpressio override def foldable: Boolean = children.forall(_.foldable) - override def eval(input: InternalRow): Any = dataType match { + override def eval(input: InternalRow): Any = doConcat(inpu
[spark] 01/01: Preparing Spark release v2.3.3-rc1
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to tag v2.3.3-rc1 in repository https://gitbox.apache.org/repos/asf/spark.git commit b5ea9330e3072e99841270b10dc1d2248127064b Author: Takeshi Yamamuro AuthorDate: Wed Jan 16 13:21:25 2019 + Preparing Spark release v2.3.3-rc1 --- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 40 files changed, 40 insertions(+), 40 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index f8b15cc..6a8cd4f 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index e412a47..6010b6e 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index d8f9a3d..8b5d3c8 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index a1a4f87..dd27a24 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index e650978..aded5e7d 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 350e3cb..a50f612 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index e7fea41..8112ca4 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 601cc5d..0d5f61f 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 2a7e644..930128d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 7629f5f..8e9c3b5 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -14,7 +14,7 @@ include: # These allow
[spark] tag v2.3.3-rc1 deleted (was 2e01a70)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to tag v2.3.3-rc1 in repository https://gitbox.apache.org/repos/asf/spark.git. *** WARNING: tag v2.3.3-rc1 was deleted! *** was 2e01a70 Preparing Spark release v2.3.3-rc1 The revisions that were on this tag are still contained in other references; therefore, this change does not discard any commits from the repository. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/01: Preparing development version 2.3.4-SNAPSHOT
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit 8319ba736c8909aa944e28d1c8de501926e9f50f Author: Takeshi Yamamuro AuthorDate: Wed Jan 16 13:22:01 2019 + Preparing development version 2.3.4-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 42 insertions(+), 42 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 6ec4966..a82446e 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.3.3 +Version: 2.3.4 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), diff --git a/assembly/pom.xml b/assembly/pom.xml index 6a8cd4f..612a1b8 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 6010b6e..5547e97 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 8b5d3c8..119dde2 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index dd27a24..dba5224 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index aded5e7d..56902a3 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index a50f612..5302d95 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 8112ca4..232ebfa 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 0d5f61f..f0baa2a 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache
[spark] branch branch-2.3 updated (18c138b -> 8319ba7)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git. from 18c138b Revert "[SPARK-26576][SQL] Broadcast hint not applied to partitioned table" add b5ea933 Preparing Spark release v2.3.3-rc1 new 8319ba7 Preparing development version 2.3.4-SNAPSHOT The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 42 insertions(+), 42 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.3 updated (1979712 -> 18c138b)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git. omit 1979712 [SPARK-26120][TESTS][SS][SPARKR] Fix a streaming query leak in Structured Streaming R tests omit 3137dca Preparing development version 2.3.4-SNAPSHOT omit 2e01a70 Preparing Spark release v2.3.3-rc1 new 2a82295 [SPARK-26120][TESTS][SS][SPARKR] Fix a streaming query leak in Structured Streaming R tests new 18c138b Revert "[SPARK-26576][SQL] Broadcast hint not applied to partitioned table" This update added new revisions after undoing existing revisions. That is to say, some revisions that were in the old version of the branch are not in the new version. This situation occurs when a user --force pushes a change and generates a repository containing something like this: * -- * -- B -- O -- O -- O (1979712) \ N -- N -- N refs/heads/branch-2.3 (18c138b) You should already have received notification emails for all of the O revisions, and so the following emails describe only the N revisions from the common base, B. Any revisions marked "omit" are not gone; other references still refer to them. Any revisions marked "discard" are gone forever. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- .../apache/spark/sql/catalyst/planning/patterns.scala | 3 +++ sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- .../execution/PruneFileSourcePartitionsSuite.scala| 19 +-- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 43 files changed, 46 insertions(+), 60 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 02/02: Revert "[SPARK-26576][SQL] Broadcast hint not applied to partitioned table"
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit 18c138bf01cd43edc6db7115b90c4e7ae7126392 Author: Takeshi Yamamuro AuthorDate: Wed Jan 16 21:56:39 2019 +0900 Revert "[SPARK-26576][SQL] Broadcast hint not applied to partitioned table" This reverts commit 87c2c11e742a8b35699f68ec2002f817c56bef87. --- .../apache/spark/sql/catalyst/planning/patterns.scala | 3 +++ .../execution/PruneFileSourcePartitionsSuite.scala| 19 +-- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala index a91063b..cc391aa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala @@ -65,6 +65,9 @@ object PhysicalOperation extends PredicateHelper { val substitutedCondition = substitute(aliases)(condition) (fields, filters ++ splitConjunctivePredicates(substitutedCondition), other, aliases) + case h: ResolvedHint => +collectProjectsAndFilters(h.child) + case other => (None, Nil, other, Map.empty) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala index 8a9adf7..9438418 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/PruneFileSourcePartitionsSuite.scala @@ -17,20 +17,15 @@ package org.apache.spark.sql.hive.execution -import org.scalatest.Matchers._ - import org.apache.spark.sql.QueryTest import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ -import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project, ResolvedHint} +import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan, Project} import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, LogicalRelation, PruneFileSourcePartitions} import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat -import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec -import org.apache.spark.sql.functions.broadcast import org.apache.spark.sql.hive.test.TestHiveSingleton -import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType @@ -96,16 +91,4 @@ class PruneFileSourcePartitionsSuite extends QueryTest with SQLTestUtils with Te assert(size2 < tableStats.get.sizeInBytes) } } - - test("SPARK-26576 Broadcast hint not applied to partitioned table") { -withTable("tbl") { - withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") { -spark.range(10).selectExpr("id", "id % 3 as p").write.partitionBy("p").saveAsTable("tbl") -val df = spark.table("tbl") -val qe = df.join(broadcast(df), "p").queryExecution -qe.optimizedPlan.collect { case _: ResolvedHint => } should have size 1 -qe.sparkPlan.collect { case j: BroadcastHashJoinExec => j } should have size 1 - } -} - } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/02: [SPARK-26120][TESTS][SS][SPARKR] Fix a streaming query leak in Structured Streaming R tests
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit 2a82295bd4b904f50d42a7585a72f91fff75353d Author: Shixiong Zhu AuthorDate: Wed Nov 21 09:31:12 2018 +0800 [SPARK-26120][TESTS][SS][SPARKR] Fix a streaming query leak in Structured Streaming R tests ## What changes were proposed in this pull request? Stop the streaming query in `Specify a schema by using a DDL-formatted string when reading` to avoid outputting annoying logs. ## How was this patch tested? Jenkins Closes #23089 from zsxwing/SPARK-26120. Authored-by: Shixiong Zhu Signed-off-by: hyukjinkwon (cherry picked from commit 4b7f7ef5007c2c8a5090f22c6e08927e9f9a407b) Signed-off-by: Felix Cheung --- R/pkg/tests/fulltests/test_streaming.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/pkg/tests/fulltests/test_streaming.R b/R/pkg/tests/fulltests/test_streaming.R index bfb1a04..6f0d2ae 100644 --- a/R/pkg/tests/fulltests/test_streaming.R +++ b/R/pkg/tests/fulltests/test_streaming.R @@ -127,6 +127,7 @@ test_that("Specify a schema by using a DDL-formatted string when reading", { expect_false(awaitTermination(q, 5 * 1000)) callJMethod(q@ssq, "processAllAvailable") expect_equal(head(sql("SELECT count(*) FROM people3"))[[1]], 3) + stopQuery(q) expect_error(read.stream(path = parquetPath, schema = "name stri"), "DataType stri is not supported.") - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] tag v2.3.3-rc1 created (now b5ea933)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to tag v2.3.3-rc1 in repository https://gitbox.apache.org/repos/asf/spark.git. at b5ea933 (commit) This tag includes the following new commits: new b5ea933 Preparing Spark release v2.3.3-rc1 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r32004 - /dev/spark/v2.3.3-rc1-docs/
Author: yamamuro Date: Thu Jan 17 04:51:37 2019 New Revision: 32004 Log: Removing RC artifacts. Removed: dev/spark/v2.3.3-rc1-docs/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r32005 - /dev/spark/v2.3.3-rc1-bin/
Author: yamamuro Date: Thu Jan 17 04:53:00 2019 New Revision: 32005 Log: Removing RC artifacts. Removed: dev/spark/v2.3.3-rc1-bin/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r32006 - in /dev/spark/v2.3.3-rc1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _site/api/java/org/apache/spark
Author: yamamuro Date: Thu Jan 17 05:16:42 2019 New Revision: 32006 Log: Apache Spark v2.3.3-rc1 docs [This commit notification would consist of 1447 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.3 updated: [SPARK-24740][PYTHON][ML][BACKPORT-2.3] Make PySpark's tests compatible with NumPy 1.14+
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.3 by this push: new bf3cdea [SPARK-24740][PYTHON][ML][BACKPORT-2.3] Make PySpark's tests compatible with NumPy 1.14+ bf3cdea is described below commit bf3cdeae3f27effb50f874cfe05f14192be47783 Author: hyukjinkwon AuthorDate: Sat Jan 19 13:09:44 2019 +0900 [SPARK-24740][PYTHON][ML][BACKPORT-2.3] Make PySpark's tests compatible with NumPy 1.14+ ## What changes were proposed in this pull request? This PR backported SPARK-24740 to branch-2.3; This PR proposes to make PySpark's tests compatible with NumPy 0.14+ NumPy 0.14.x introduced rather radical changes about its string representation. For example, the tests below are failed: ``` ** File "/.../spark/python/pyspark/ml/linalg/__init__.py", line 895, in __main__.DenseMatrix.__str__ Failed example: print(dm) Expected: DenseMatrix([[ 0., 2.], [ 1., 3.]]) Got: DenseMatrix([[0., 2.], [1., 3.]]) ** File "/.../spark/python/pyspark/ml/linalg/__init__.py", line 899, in __main__.DenseMatrix.__str__ Failed example: print(dm) Expected: DenseMatrix([[ 0., 1.], [ 2., 3.]]) Got: DenseMatrix([[0., 1.], [2., 3.]]) ** File "/.../spark/python/pyspark/ml/linalg/__init__.py", line 939, in __main__.DenseMatrix.toArray Failed example: m.toArray() Expected: array([[ 0., 2.], [ 1., 3.]]) Got: array([[0., 2.], [1., 3.]]) ** File "/.../spark/python/pyspark/ml/linalg/__init__.py", line 324, in __main__.DenseVector.dot Failed example: dense.dot(np.reshape([1., 2., 3., 4.], (2, 2), order='F')) Expected: array([ 5., 11.]) Got: array([ 5., 11.]) ** File "/.../spark/python/pyspark/ml/linalg/__init__.py", line 567, in __main__.SparseVector.dot Failed example: a.dot(np.array([[1, 1], [2, 2], [3, 3], [4, 4]])) Expected: array([ 22., 22.]) Got: array([22., 22.]) ``` See [release note](https://docs.scipy.org/doc/numpy-1.14.0/release.html#compatibility-notes). ## How was this patch tested? Manually tested: ``` $ ./run-tests --python-executables=python3.6,python2.7 --modules=pyspark-ml,pyspark-mllib Running PySpark tests. Output is in /.../spark/python/unit-tests.log Will test against the following Python executables: ['python3.6', 'python2.7'] Will test the following Python modules: ['pyspark-ml', 'pyspark-mllib'] Starting test(python2.7): pyspark.mllib.tests Starting test(python2.7): pyspark.ml.classification Starting test(python3.6): pyspark.mllib.tests Starting test(python2.7): pyspark.ml.clustering Finished test(python2.7): pyspark.ml.clustering (54s) Starting test(python2.7): pyspark.ml.evaluation Finished test(python2.7): pyspark.ml.classification (74s) Starting test(python2.7): pyspark.ml.feature Finished test(python2.7): pyspark.ml.evaluation (27s) Starting test(python2.7): pyspark.ml.fpm Finished test(python2.7): pyspark.ml.fpm (0s) Starting test(python2.7): pyspark.ml.image Finished test(python2.7): pyspark.ml.image (17s) Starting test(python2.7): pyspark.ml.linalg.__init__ Finished test(python2.7): pyspark.ml.linalg.__init__ (1s) Starting test(python2.7): pyspark.ml.recommendation Finished test(python2.7): pyspark.ml.feature (76s) Starting test(python2.7): pyspark.ml.regression Finished test(python2.7): pyspark.ml.recommendation (69s) Starting test(python2.7): pyspark.ml.stat Finished test(python2.7): pyspark.ml.regression (45s) Starting test(python2.7): pyspark.ml.tests Finished test(python2.7): pyspark.ml.stat (28s) Starting test(python2.7): pyspark.ml.tuning Finished test(python2.7): pyspark.ml.tuning (20s) Starting test(python2.7): pyspark.mllib.classification Finished test(python2.7): pyspark.mllib.classification (31s) Starting test(python2.7): pyspark.mllib.clustering Finished test(python2.7): pyspark.mllib.tests (260s) Starting test(python2.7): pyspark.mllib.evaluation Finished test(python3.6): pyspark.mllib.tests (266s) Starting test(python2.7): pyspark.mllib.feature Finis
svn commit: r32017 - /dev/spark/v2.3.3-rc1-bin/
Author: yamamuro Date: Thu Jan 17 15:53:20 2019 New Revision: 32017 Log: Apache Spark v2.3.3-rc1 Added: dev/spark/v2.3.3-rc1-bin/ dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz (with props) dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.asc dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.sha512 dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz (with props) dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.asc dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.sha512 dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-hadoop2.6.tgz (with props) dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-hadoop2.6.tgz.asc dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-hadoop2.6.tgz.sha512 dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-hadoop2.7.tgz (with props) dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-hadoop2.7.tgz.asc dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-hadoop2.7.tgz.sha512 dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-without-hadoop.tgz (with props) dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-without-hadoop.tgz.asc dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-without-hadoop.tgz.sha512 dev/spark/v2.3.3-rc1-bin/spark-2.3.3.tgz (with props) dev/spark/v2.3.3-rc1-bin/spark-2.3.3.tgz.asc dev/spark/v2.3.3-rc1-bin/spark-2.3.3.tgz.sha512 Added: dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz == Binary file - no diff available. Propchange: dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.asc == --- dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.asc (added) +++ dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.asc Thu Jan 17 15:53:20 2019 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v1 + +iQIcBAABAgAGBQJcQHopAAoJEG7F8QUt8I/0gHMQAJ8tWZUAYncu7n/FTuXfAFdv +2gr6Q9u9zQNeajFMBWNApTuYZdrEzYPz+ocyhUShESRzjNvhQa3JyfWtqkLlGqvq +gLC0phytV6WXOsdqUHjFs8OCToSpB6hSvqurQOxWGmA0uCmagjmD0eEDnsoHxd2j +/pEcXLBwrc9yGhZmIk0Sz+yU7PaTXxiYZgCsLY6qPPna0SnF4UWWw0ltnPMABPB8 +QtyT2iI5RzjRndyU2G+yv1cwAeXwD6jNbtQkOWprAUGhPCsS2BXkAeDY4aDGupa4 +IYxQ3lwDERx3cIZGdY3xoKAKKQR29UALCh+jUX2d1rSF+IRRyv7tl0Jc+IIhyU/3 +gtpMw+JqiqVOCP8Feb5B/oGD+gnUtqAVLggsJa7FYRzzDhY2EqohOLMeqqErrYAZ +Ockt+6u8No7Tfbz71pMBX9UBB9HKnYpiocxqBhphAa0c9OjVw5hSAsUyoelIYnJ6 +QIyWrGLqNiNaTkIUm0IKL4ni19MSpesM05gpCRcLgyeidNRf4u3WFFxJQRrE5/eh +AiQRTAhUfP6GkTYc+wFj4WOsTZHKPg8UCfJ5awkof0E61QZ+zv5McUCXTMgJr5B0 +dNL2KPbG62/+626EjuU90G7wtlp+c4Ywi43ayV1LLo3RUQyu+zdNiSMvn8ksWEAZ +jS5CROEO6lIBaJKJ8WuW +=tGPH +-END PGP SIGNATURE- Added: dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.sha512 == --- dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.sha512 (added) +++ dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.sha512 Thu Jan 17 15:53:20 2019 @@ -0,0 +1,3 @@ +SparkR_2.3.3.tar.gz: 836F3DF8 4A020797 4F8B81D7 65629B34 54FCB070 5DDDF7AF + 940A68AB 3056ADBB 2F76B08C E0046BCE 68B86E5E 56B1F92E + FC7AAE37 A44964E5 15E7DA24 A5146234 Added: dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz == Binary file - no diff available. Propchange: dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.asc == --- dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.asc (added) +++ dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.asc Thu Jan 17 15:53:20 2019 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v1 + +iQIcBAABAgAGBQJcQHouAAoJEG7F8QUt8I/0PLAP/2px9DN0ZDnE/Mpl4Yp/ltS1 +f/XEzdK7pxtGDgmOBRKWahKJAv7GUAlpLxAif8MBoq3V3GaWIQWcrXVuD2i8kVQl +E0hCZerwON+ns1HC/j7MEieSSxM/WBlDj5yMo23NZFlzi5lKdyOOtuIM4VeKbaaa +jbUjIGReM/zMmKsLtzPjh9p9OQ+BXcZ6xoKw/HO1FYDMn/CsqGqGIoEdzuND9KzD +6i3jVa3rzoZG4oH/e/N3tloCrwAeLUoL+4NKONzczWlYhRZW8gvBvoSSBLApi1nu +LQLvOlQKjybyxqPcfKWmtloKJyogYX1ZGdPzD5jc4yGYoKVX196Y1dndb6dzcUap +hf7JZ45Kx0WAsp/ZV5YY9bF+rS0EDZiHPQoyip3SqJizNaMXLxM2Wl+0zZGi0azf +06WjqvknC9YP85YBXsOthwy4+OipVbbggi7ihdiBiqat/N+PBa3+/UFkJhlrAPJD +I2/maEYfN1F3/NiBgwWosCRQw1yLCt1cF6NMPf3dto8JnBt4buu5W3VkIrnpstR6 +lqP5IBOYaBR1NuyvaXB50gbPWFDn3YRy7Ej0rPoXt31X0Nv3flinCwYnwyZa2bxC +7aTabfdWsx3vhWCcoWqid8wM/OawhwzqzGUbpGgeinqhWCXvAcgLcP+pDjnLpFOu +I9Ahvv3rYlwZNAWLovNc +=TUU/ +-END PGP SIGNATURE- Added: dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.sha512 == --- dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.sha512 (added) +++ dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.sha512
spark git commit: [MINOR][SQL] Combine the same codes in test cases
Repository: spark Updated Branches: refs/heads/master 261284842 -> 93f5592aa [MINOR][SQL] Combine the same codes in test cases ## What changes were proposed in this pull request? In the DDLSuit, there are four test cases have the same codes , writing a function can combine the same code. ## How was this patch tested? existing tests. Closes #23194 from CarolinePeng/Update_temp. Authored-by: å½ç¿00244106 <00244106@zte.intra> Signed-off-by: Takeshi Yamamuro Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/93f5592a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/93f5592a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/93f5592a Branch: refs/heads/master Commit: 93f5592aa8c1254a93524fda81cf0e418c22cb2f Parents: 2612848 Author: å½ç¿00244106 <00244106@zte.intra> Authored: Tue Dec 4 22:08:16 2018 +0900 Committer: Takeshi Yamamuro Committed: Tue Dec 4 22:08:16 2018 +0900 -- .../spark/sql/execution/command/DDLSuite.scala | 40 1 file changed, 16 insertions(+), 24 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/93f5592a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index 9d32fb6..052a5e7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -377,41 +377,41 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { } } - test("CTAS a managed table with the existing empty directory") { -val tableLoc = new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier("tab1"))) + private def withEmptyDirInTablePath(dirName: String)(f : File => Unit): Unit = { +val tableLoc = + new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier(dirName))) try { tableLoc.mkdir() + f(tableLoc) +} finally { + waitForTasksToFinish() + Utils.deleteRecursively(tableLoc) +} + } + + + test("CTAS a managed table with the existing empty directory") { +withEmptyDirInTablePath("tab1") { tableLoc => withTable("tab1") { sql(s"CREATE TABLE tab1 USING ${dataSource} AS SELECT 1, 'a'") checkAnswer(spark.table("tab1"), Row(1, "a")) } -} finally { - waitForTasksToFinish() - Utils.deleteRecursively(tableLoc) } } test("create a managed table with the existing empty directory") { -val tableLoc = new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier("tab1"))) -try { - tableLoc.mkdir() +withEmptyDirInTablePath("tab1") { tableLoc => withTable("tab1") { sql(s"CREATE TABLE tab1 (col1 int, col2 string) USING ${dataSource}") sql("INSERT INTO tab1 VALUES (1, 'a')") checkAnswer(spark.table("tab1"), Row(1, "a")) } -} finally { - waitForTasksToFinish() - Utils.deleteRecursively(tableLoc) } } test("create a managed table with the existing non-empty directory") { withTable("tab1") { - val tableLoc = new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier("tab1"))) - try { -// create an empty hidden file -tableLoc.mkdir() + withEmptyDirInTablePath("tab1") { tableLoc => val hiddenGarbageFile = new File(tableLoc.getCanonicalPath, ".garbage") hiddenGarbageFile.createNewFile() val exMsg = "Can not create the managed table('`tab1`'). The associated location" @@ -439,28 +439,20 @@ abstract class DDLSuite extends QueryTest with SQLTestUtils { }.getMessage assert(ex.contains(exMsgWithDefaultDB)) } - } finally { -waitForTasksToFinish() -Utils.deleteRecursively(tableLoc) } } } test("rename a managed table with existing empty directory") { -val tableLoc = new File(spark.sessionState.catalog.defaultTablePath(TableIdentifier("tab2"))) -try { +withEmptyDirInTablePath("tab2") { tableLoc => withTable("tab1") { sql(s"CREATE TABLE tab1 USING $dataSource AS SELECT 1, 'a'") -tableLoc.mkdir() val ex = intercept[AnalysisException] {
[spark] branch master updated: [SPARK-26459][SQL] replace UpdateNullabilityInAttributeReferences with FixNullability
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 6955638 [SPARK-26459][SQL] replace UpdateNullabilityInAttributeReferences with FixNullability 6955638 is described below commit 6955638eae99cbe0a890a50e0c61c17641e7269f Author: Wenchen Fan AuthorDate: Thu Jan 10 20:15:25 2019 +0900 [SPARK-26459][SQL] replace UpdateNullabilityInAttributeReferences with FixNullability ## What changes were proposed in this pull request? This is a followup of https://github.com/apache/spark/pull/18576 The newly added rule `UpdateNullabilityInAttributeReferences` does the same thing the `FixNullability` does, we only need to keep one of them. This PR removes `UpdateNullabilityInAttributeReferences`, and use `FixNullability` to replace it. Also rename it to `UpdateAttributeNullability` ## How was this patch tested? existing tests Closes #23390 from cloud-fan/nullable. Authored-by: Wenchen Fan Signed-off-by: Takeshi Yamamuro --- .../spark/sql/catalyst/analysis/Analyzer.scala | 38 +-- .../analysis/UpdateAttributeNullability.scala | 57 ++ .../spark/sql/catalyst/optimizer/Optimizer.scala | 18 +-- ...dateAttributeNullabilityInOptimizerSuite.scala} | 9 ++-- 4 files changed, 65 insertions(+), 57 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 2aa0f21..a84bb76 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -197,8 +197,8 @@ class Analyzer( PullOutNondeterministic), Batch("UDF", Once, HandleNullInputsForUDF), -Batch("FixNullability", Once, - FixNullability), +Batch("UpdateNullability", Once, + UpdateAttributeNullability), Batch("Subquery", Once, UpdateOuterReferences), Batch("Cleanup", fixedPoint, @@ -1822,40 +1822,6 @@ class Analyzer( } /** - * Fixes nullability of Attributes in a resolved LogicalPlan by using the nullability of - * corresponding Attributes of its children output Attributes. This step is needed because - * users can use a resolved AttributeReference in the Dataset API and outer joins - * can change the nullability of an AttribtueReference. Without the fix, a nullable column's - * nullable field can be actually set as non-nullable, which cause illegal optimization - * (e.g., NULL propagation) and wrong answers. - * See SPARK-13484 and SPARK-13801 for the concrete queries of this case. - */ - object FixNullability extends Rule[LogicalPlan] { - -def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp { - case p if !p.resolved => p // Skip unresolved nodes. - case p: LogicalPlan if p.resolved => -val childrenOutput = p.children.flatMap(c => c.output).groupBy(_.exprId).flatMap { - case (exprId, attributes) => -// If there are multiple Attributes having the same ExprId, we need to resolve -// the conflict of nullable field. We do not really expect this happen. -val nullable = attributes.exists(_.nullable) -attributes.map(attr => attr.withNullability(nullable)) -}.toSeq -// At here, we create an AttributeMap that only compare the exprId for the lookup -// operation. So, we can find the corresponding input attribute's nullability. -val attributeMap = AttributeMap[Attribute](childrenOutput.map(attr => attr -> attr)) -// For an Attribute used by the current LogicalPlan, if it is from its children, -// we fix the nullable field by using the nullability setting of the corresponding -// output Attribute from the children. -p.transformExpressions { - case attr: Attribute if attributeMap.contains(attr) => -attr.withNullability(attributeMap(attr).nullable) -} -} - } - - /** * Extracts [[WindowExpression]]s from the projectList of a [[Project]] operator and * aggregateExpressions of an [[Aggregate]] operator and creates individual [[Window]] * operators for every distinct [[WindowSpecDefinition]]. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UpdateAttributeNullability.scala new file mode 100644 index 000..8655dec --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Update
svn commit: r31887 - /dev/spark/KEYS
Author: yamamuro Date: Fri Jan 11 06:45:33 2019 New Revision: 31887 Log: Update KEYS Modified: dev/spark/KEYS Modified: dev/spark/KEYS == --- dev/spark/KEYS (original) +++ dev/spark/KEYS Fri Jan 11 06:45:33 2019 @@ -829,3 +829,61 @@ aI9kX8V9gl5PZLw+LchGX5H7HKoRxZM3UbPkY5Mv ZIAzEigXrrsePyvHGf6H =6YJg -END PGP PUBLIC KEY BLOCK- + +pub rsa4096 2019-01-10 [SC] + 0E9925082727075EEE83D4B06EC5F1052DF08FF4 +uid [ultimate] Takeshi Yamamuro (CODE SIGNING KEY) +sub rsa4096 2019-01-10 [E] + +-BEGIN PGP PUBLIC KEY BLOCK- + +mQINBFw2q20BEADLW2BZbJO2YHmAmAumggCTm4aVWFRYH+NX0zqEX2bynA0GM5hR +euvLL6w5vq44S6zU+39o1s9wSDcBAqLNpPB2eDL8qqXKZa/AQTwCiitk9aDB1KZB +DzejoqtrtCK1WnCW7oB7mQIq+/txSyLgv1UgFijh2aAx0ChmMnb2WbeZAQz/5ids +ixMfZiRofZVJIjdNNe5kIBcc9uthoyLw3x16nLT3zrATtBSDAL8hAULOqXPMMf3T +xzm2cPnOnqFlKGkEWRuptnoPHJ8+Uwbb91oQmlFGolU9PvCQVdmtMWCmqvlg5SeZ +VSC+w4eUk8M2nWxPh+WrPP5eQMDVUdmWgC/ZzCoNW/AxY4T9G3h3XLpZoyoDEUmd +Xk95KiEq/fo2ZT2jF31tPsGPhlzGETnzDK1xdNtoFKqjvWxwdPmJgGBau2d30rxJ +gvrjMtvcJ8Z/L7D0hKR8r8eJB6GlfBTLARVQ/XygNS1sfR6+rv/kNFGR8932bNsf +OtxiAo1Ga3vn3Q3WK+9Ddz4HKhsoOwWYllRNE60xB2LGM7ZjvvY/I9Vx2Fqfew5z +MC1s3u1Bgu0FIepV+N0Qxs2yfavdfLSVCFZ2elXkyZ7vGAFikksgGRSLbYgx01Qx +gCx3nzYL1uol6s1z6jj039p/mEqSVMY1FiecmK3/inNMy4dLjg6s+Au+GwARAQAB +tDlUYWtlc2hpIFlhbWFtdXJvIChDT0RFIFNJR05JTkcgS0VZKSA8eWFtYW11cm9A +YXBhY2hlLm9yZz6JAk4EEwEIADgWIQQOmSUIJycHXu6D1LBuxfEFLfCP9AUCXDar +bQIbAwULCQgHAgYVCgkICwIEFgIDAQIeAQIXgAAKCRBuxfEFLfCP9OLWD/4uxh1M +3BiXxwqKoBbephYTI//iSVRmwSXQdm7fsPkZywc7K4W+jiYyf1Qe8mZ4ikNVnvcE +W7+FkLGWDFHcIXddXcruynrTeQ9YwO/RPY26qYGWfeXaIf7obVSRVT6wCg//rw/o +xglE2aBXM6kgEgcZRkIo5FeLxGK0VgQ56ANN4Aa4/Jev7/Fca+MkeXH6UlxnkMD5 +W/UgMWMEZFKJPXiLpxgmhzzq5T5ahvhRQfxtRAXz+w/SK+vo+jeZZ5/SqtDECsa4 +uG/iWjC5bNOsV97iCFx/KxNY5I4U4Q5svG6mz+IRgCMV3jpkslQfME2wXgC/k1bT +vr8ICOEzQguYgBYdXl99cMgy3ULPy1vbx4DycuKneKtkp25voy5rtU3+JBrxpwSa +TwD1gRiXFscZ5oomI3rn0jPq1dIKhrQaG0T2QwKn47spdPK0TWbec+SNo07dDaC0 +IsqgSZ1fkGk5ILTZ/AfYzdnHHeJ3IvrkVFLMMD35Rwcji8E85tMXV7GmlDejjMNk +QTQMQymXB+yRqIrHMAss1IY11UmQCtGSJfHwiAYW+iRBZfpB7fFvHMhwQFT4wEPW +St5JyUiRled8+1BtDUYeBjDr9UtAh/moD7xXtu8wiZjea87LUt+H/tTogsHWN/kJ +igCoSWXK5ugVy8sKI/Q+jQSgXzduChiTQQWIvrkCDQRcNqttARAA0WuzOkBGx6/S +0YV5GGwn0+Zqxhm0EV/G4cT+1IPKgiMTuTp/vRF7IDwZwh5oalG4Cl7YGygqEx/V +gHqtf0m1aFV4vndmmMaHKnYAl9/rk3Svu3BRXgu9sJPoMz3nDlRhcT3IvVPZw34E +PQg0tKhnAbvSwxpRL1jHhJgHTYmebja0UTSVr3NXAs8Z+XSEjZN//5B5m4N2UkUh +XVMzfDWaOa+EYlKmzhqIt6Q8/MNjFp7jeNOKUMBoIP0JKf3Y37M9NLolQihJ9RwE +2f0a8PN5xMVDJTcDMox+bXa0ohcYKiu6whIz82tg0hZmgtdg20lC15ZTXzJh3DRh +cklbMeLegwijHLuCBIgOtbuVknWqktx89Xdg9IG84eByDPxxuZwM9QNbfip9JHKH +Pv8M2W1wPMIIgIaRRzEu1NKUoZq14/Djn0t1hb2rjQarPOR3pqlO75TdMZJ8ZVK3 +OSUKWbLed+VI/X2I0iiH5Ag/Ajzh9qIqyKVxZI0Md7G7CWHfiVRHNzMlGP08z4sn +N6uu9vzL6GSiHU5cPtD34gPXMlWq42wXCat8GMMHZAdeCwhLVm3+wPucq8OO +S0cTmUzxdnMomUO9HST2a3aO8ulBhu4wh3Y+1gkxvJ19N+WsS6uBFBOnaWf3m1Y0 +2bKSKEtKunWfwfXHowyFwKpQF4cFClEAEQEAAYkCNgQYAQgAIBYhBA6ZJQgnJwde +7oPUsG7F8QUt8I/0BQJcNqttAhsMAAoJEG7F8QUt8I/0H54QAKtJvjP7dtCQF+pZ +oy9KgfdF0CSdpTwXbEn0VE/GcdkJxXoiDTTb9GVAm/ySpwRUcTub/jFjh3uKN1t5 +SbVUR6TfewhKZ5fsKqTbUKYXag+CRLy1n59RQPg9LcL6NwTk3+SJ4cLAnj0buVFa +nlZ0W2fC54TK2xvGcnU7S3dQdlyPuvR6ouNqzQxEuXTI0t9cXdQFpf8WLt0KknsH +kMEZpKWMnrfA5fusqiGQ+9GcjowvEc6tPiZ+bMJyJSj2kmTHnCU0krxPr/xuFfNa +YpJvIZFPwn9GKxejOcZVckKtdhXMmtFlwLnCcWuB0GRRQjd9r8R+KCJM6RlTp4yI +LBBWmPnJp0Sd/9xCdVZp1fFNZ+w72q5Z0l+6r+DuvThYhH5HdRxfmH33SzdpWEf8 +WcKCbbi9mN+2ZsJufR5LvKsNpv6DLTwCuMFlIptxSxGiYZxRYMKeZJ84AWHL7sit +ftDfwHakkfUZgprK5MBuEcjxXrsmcM25Ns+rhA80JCRmsqqreSC4M9XnKkya5hoJ +83pIuVIGxOVLhVWYkAGCqW+UVr1zBBBZYe8U3wDCFucHazqcaOHCUXAxM4rwpp/K +pqnGj9s6Uudh/FXfVN5MC0/pH/ySSACkXwCmKXAh2s8F9w199WRsNlya3Ce1Ryan +/G8Bpm/p4kbeqJtsx3t7nhPke7fG +=4noL +-END PGP PUBLIC KEY BLOCK- - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.3 updated: [SPARK-25572][SPARKR] test only if not cran
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.3 by this push: new d397348 [SPARK-25572][SPARKR] test only if not cran d397348 is described below commit d397348b7bec20743f738694a135e4b67947fd99 Author: Felix Cheung AuthorDate: Sat Sep 29 14:48:32 2018 -0700 [SPARK-25572][SPARKR] test only if not cran ## What changes were proposed in this pull request? CRAN doesn't seem to respect the system requirements as running tests - we have seen cases where SparkR is run on Java 10, which unfortunately Spark does not start on. For 2.4, lets attempt skipping all tests ## How was this patch tested? manual, jenkins, appveyor Author: Felix Cheung Closes #22589 from felixcheung/ralltests. (cherry picked from commit f4b138082ff91be74b0f5bbe19cdb90dd9e5f131) Signed-off-by: Takeshi Yamamuro --- R/pkg/tests/run-all.R | 83 +++ 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R index 94d7518..1e96418 100644 --- a/R/pkg/tests/run-all.R +++ b/R/pkg/tests/run-all.R @@ -18,50 +18,55 @@ library(testthat) library(SparkR) -# Turn all warnings into errors -options("warn" = 2) +# SPARK-25572 +if (identical(Sys.getenv("NOT_CRAN"), "true")) { -if (.Platform$OS.type == "windows") { - Sys.setenv(TZ = "GMT") -} + # Turn all warnings into errors + options("warn" = 2) -# Setup global test environment -# Install Spark first to set SPARK_HOME + if (.Platform$OS.type == "windows") { +Sys.setenv(TZ = "GMT") + } -# NOTE(shivaram): We set overwrite to handle any old tar.gz files or directories left behind on -# CRAN machines. For Jenkins we should already have SPARK_HOME set. -install.spark(overwrite = TRUE) + # Setup global test environment + # Install Spark first to set SPARK_HOME -sparkRDir <- file.path(Sys.getenv("SPARK_HOME"), "R") -sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db") -invisible(lapply(sparkRWhitelistSQLDirs, - function(x) { unlink(file.path(sparkRDir, x), recursive = TRUE, force = TRUE)})) -sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE) + # NOTE(shivaram): We set overwrite to handle any old tar.gz files or directories left behind on + # CRAN machines. For Jenkins we should already have SPARK_HOME set. + install.spark(overwrite = TRUE) -sparkRTestMaster <- "local[1]" -sparkRTestConfig <- list() -if (identical(Sys.getenv("NOT_CRAN"), "true")) { - sparkRTestMaster <- "" -} else { - # Disable hsperfdata on CRAN - old_java_opt <- Sys.getenv("_JAVA_OPTIONS") - Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt)) - tmpDir <- tempdir() - tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir) - sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg, - spark.executor.extraJavaOptions = tmpArg) -} + sparkRDir <- file.path(Sys.getenv("SPARK_HOME"), "R") + sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db") + invisible(lapply(sparkRWhitelistSQLDirs, + function(x) { unlink(file.path(sparkRDir, x), recursive = TRUE, force = TRUE)})) + sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE) -test_package("SparkR") + sparkRTestMaster <- "local[1]" + sparkRTestConfig <- list() + if (identical(Sys.getenv("NOT_CRAN"), "true")) { +sparkRTestMaster <- "" + } else { +# Disable hsperfdata on CRAN +old_java_opt <- Sys.getenv("_JAVA_OPTIONS") +Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt)) +tmpDir <- tempdir() +tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir) +sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg, + spark.executor.extraJavaOptions = tmpArg) + } -if (identical(Sys.getenv("NOT_CRAN"), "true")) { - # set random seed for predictable results. mostly for base's sample() in tree and classification - set.seed(42) - # for testthat 1.0.2 later, change reporter from "summary" to default_reporter() - testthat:::run_tests("SparkR", - file.path(sparkRDir, "pkg", "tests", "fulltests"), - NULL, - "summary") -} + test_package("SparkR") + + if (identica
[spark] branch branch-2.3 updated (d397348 -> 01511e4)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git. discard d397348 [SPARK-25572][SPARKR] test only if not cran discard a9a1bc7 [SPARK-26010][R] fix vignette eval with Java 11 discard e46b0ed Preparing development version 2.3.4-SNAPSHOT discard 0e3d5fd Preparing Spark release v2.3.3-rc1 new 20b7490 [SPARK-26010][R] fix vignette eval with Java 11 new 01511e4 [SPARK-25572][SPARKR] test only if not cran This update added new revisions after undoing existing revisions. That is to say, some revisions that were in the old version of the branch are not in the new version. This situation occurs when a user --force pushes a change and generates a repository containing something like this: * -- * -- B -- O -- O -- O (d397348) \ N -- N -- N refs/heads/branch-2.3 (01511e4) You should already have received notification emails for all of the O revisions, and so the following emails describe only the N revisions from the common base, B. Any revisions marked "omit" are not gone; other references still refer to them. Any revisions marked "discard" are gone forever. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 42 insertions(+), 42 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 02/02: [SPARK-25572][SPARKR] test only if not cran
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit 01511e479013c56d70fe8ffa805ecbd66591b57e Author: Felix Cheung AuthorDate: Sat Sep 29 14:48:32 2018 -0700 [SPARK-25572][SPARKR] test only if not cran ## What changes were proposed in this pull request? CRAN doesn't seem to respect the system requirements as running tests - we have seen cases where SparkR is run on Java 10, which unfortunately Spark does not start on. For 2.4, lets attempt skipping all tests ## How was this patch tested? manual, jenkins, appveyor Author: Felix Cheung Closes #22589 from felixcheung/ralltests. (cherry picked from commit f4b138082ff91be74b0f5bbe19cdb90dd9e5f131) Signed-off-by: Takeshi Yamamuro --- R/pkg/tests/run-all.R | 83 +++ 1 file changed, 44 insertions(+), 39 deletions(-) diff --git a/R/pkg/tests/run-all.R b/R/pkg/tests/run-all.R index 94d7518..1e96418 100644 --- a/R/pkg/tests/run-all.R +++ b/R/pkg/tests/run-all.R @@ -18,50 +18,55 @@ library(testthat) library(SparkR) -# Turn all warnings into errors -options("warn" = 2) +# SPARK-25572 +if (identical(Sys.getenv("NOT_CRAN"), "true")) { -if (.Platform$OS.type == "windows") { - Sys.setenv(TZ = "GMT") -} + # Turn all warnings into errors + options("warn" = 2) -# Setup global test environment -# Install Spark first to set SPARK_HOME + if (.Platform$OS.type == "windows") { +Sys.setenv(TZ = "GMT") + } -# NOTE(shivaram): We set overwrite to handle any old tar.gz files or directories left behind on -# CRAN machines. For Jenkins we should already have SPARK_HOME set. -install.spark(overwrite = TRUE) + # Setup global test environment + # Install Spark first to set SPARK_HOME -sparkRDir <- file.path(Sys.getenv("SPARK_HOME"), "R") -sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db") -invisible(lapply(sparkRWhitelistSQLDirs, - function(x) { unlink(file.path(sparkRDir, x), recursive = TRUE, force = TRUE)})) -sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE) + # NOTE(shivaram): We set overwrite to handle any old tar.gz files or directories left behind on + # CRAN machines. For Jenkins we should already have SPARK_HOME set. + install.spark(overwrite = TRUE) -sparkRTestMaster <- "local[1]" -sparkRTestConfig <- list() -if (identical(Sys.getenv("NOT_CRAN"), "true")) { - sparkRTestMaster <- "" -} else { - # Disable hsperfdata on CRAN - old_java_opt <- Sys.getenv("_JAVA_OPTIONS") - Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt)) - tmpDir <- tempdir() - tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir) - sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg, - spark.executor.extraJavaOptions = tmpArg) -} + sparkRDir <- file.path(Sys.getenv("SPARK_HOME"), "R") + sparkRWhitelistSQLDirs <- c("spark-warehouse", "metastore_db") + invisible(lapply(sparkRWhitelistSQLDirs, + function(x) { unlink(file.path(sparkRDir, x), recursive = TRUE, force = TRUE)})) + sparkRFilesBefore <- list.files(path = sparkRDir, all.files = TRUE) -test_package("SparkR") + sparkRTestMaster <- "local[1]" + sparkRTestConfig <- list() + if (identical(Sys.getenv("NOT_CRAN"), "true")) { +sparkRTestMaster <- "" + } else { +# Disable hsperfdata on CRAN +old_java_opt <- Sys.getenv("_JAVA_OPTIONS") +Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt)) +tmpDir <- tempdir() +tmpArg <- paste0("-Djava.io.tmpdir=", tmpDir) +sparkRTestConfig <- list(spark.driver.extraJavaOptions = tmpArg, + spark.executor.extraJavaOptions = tmpArg) + } -if (identical(Sys.getenv("NOT_CRAN"), "true")) { - # set random seed for predictable results. mostly for base's sample() in tree and classification - set.seed(42) - # for testthat 1.0.2 later, change reporter from "summary" to default_reporter() - testthat:::run_tests("SparkR", - file.path(sparkRDir, "pkg", "tests", "fulltests"), - NULL, - "summary") -} + test_package("SparkR") + + if (identical(Sys.getenv("NOT_CRAN"), "true")) { +# set random seed for predictable results. mostly for base's sample() in tree and classification +set.seed(42) +
[spark] 01/02: [SPARK-26010][R] fix vignette eval with Java 11
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit 20b749021bacaa2906775944e43597ccf37af62b Author: Felix Cheung AuthorDate: Mon Nov 12 19:03:30 2018 -0800 [SPARK-26010][R] fix vignette eval with Java 11 ## What changes were proposed in this pull request? changes in vignette only to disable eval ## How was this patch tested? Jenkins Author: Felix Cheung Closes #23007 from felixcheung/rjavavervig. (cherry picked from commit 88c82627267a9731b2438f0cc28dd656eb3dc834) Signed-off-by: Felix Cheung --- R/pkg/vignettes/sparkr-vignettes.Rmd | 14 ++ 1 file changed, 14 insertions(+) diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd index d4713de..70970bd 100644 --- a/R/pkg/vignettes/sparkr-vignettes.Rmd +++ b/R/pkg/vignettes/sparkr-vignettes.Rmd @@ -57,6 +57,20 @@ First, let's load and attach the package. library(SparkR) ``` +```{r, include=FALSE} +# disable eval if java version not supported +override_eval <- tryCatch(!is.numeric(SparkR:::checkJavaVersion()), + error = function(e) { TRUE }, + warning = function(e) { TRUE }) + +if (override_eval) { + opts_hooks$set(eval = function(options) { +options$eval = FALSE +options + }) +} +``` + `SparkSession` is the entry point into SparkR which connects your R program to a Spark cluster. You can create a `SparkSession` using `sparkR.session` and pass in options such as the application name, any Spark packages depended on, etc. We use default settings in which it runs in local mode. It auto downloads Spark package in the background if no previous installation is found. For more details about setup, see [Spark Session](#SetupSparkSession). - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/01: Preparing development version 2.3.4-SNAPSHOT
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit 3137dca44b69a0b224842dd727c96c6b5bb0430d Author: Takeshi Yamamuro AuthorDate: Sun Jan 13 01:57:05 2019 + Preparing development version 2.3.4-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 42 insertions(+), 42 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 6ec4966..a82446e 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.3.3 +Version: 2.3.4 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), diff --git a/assembly/pom.xml b/assembly/pom.xml index 6a8cd4f..612a1b8 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 6010b6e..5547e97 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 8b5d3c8..119dde2 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index dd27a24..dba5224 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index aded5e7d..56902a3 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index a50f612..5302d95 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 8112ca4..232ebfa 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 0d5f61f..f0baa2a 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache
[spark] branch branch-2.3 updated (01511e4 -> 3137dca)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git. from 01511e4 [SPARK-25572][SPARKR] test only if not cran add 2e01a70 Preparing Spark release v2.3.3-rc1 new 3137dca Preparing development version 2.3.4-SNAPSHOT The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 42 insertions(+), 42 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/01: Preparing Spark release v2.3.3-rc1
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to tag v2.3.3-rc1 in repository https://gitbox.apache.org/repos/asf/spark.git commit 2e01a70bfac7aedfd5992d49e13a9f8f6a92d8a2 Author: Takeshi Yamamuro AuthorDate: Sun Jan 13 01:56:48 2019 + Preparing Spark release v2.3.3-rc1 --- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 40 files changed, 40 insertions(+), 40 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index f8b15cc..6a8cd4f 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index e412a47..6010b6e 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index d8f9a3d..8b5d3c8 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index a1a4f87..dd27a24 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index e650978..aded5e7d 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 350e3cb..a50f612 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index e7fea41..8112ca4 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 601cc5d..0d5f61f 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 2a7e644..930128d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 7629f5f..8e9c3b5 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -14,7 +14,7 @@ include: # These allow
[spark] tag v2.3.3-rc1 created (now 2e01a70)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to tag v2.3.3-rc1 in repository https://gitbox.apache.org/repos/asf/spark.git. at 2e01a70 (commit) This tag includes the following new commits: new 2e01a70 Preparing Spark release v2.3.3-rc1 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/01: Preparing Spark release v2.3.3-rc1
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to tag v2.3.3-rc1 in repository https://gitbox.apache.org/repos/asf/spark.git commit 0e3d5fd960927dd8ff1a909aba98b85fb9350c58 Author: Takeshi Yamamuro AuthorDate: Sun Jan 13 00:25:46 2019 + Preparing Spark release v2.3.3-rc1 --- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 40 files changed, 40 insertions(+), 40 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index f8b15cc..6a8cd4f 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index e412a47..6010b6e 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index d8f9a3d..8b5d3c8 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index a1a4f87..dd27a24 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index e650978..aded5e7d 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 350e3cb..a50f612 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index e7fea41..8112ca4 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 601cc5d..0d5f61f 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 2a7e644..930128d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.3-SNAPSHOT +2.3.3 ../pom.xml diff --git a/docs/_config.yml b/docs/_config.yml index 7629f5f..8e9c3b5 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -14,7 +14,7 @@ include: # These allow
[spark] 01/01: Preparing development version 2.3.4-SNAPSHOT
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit e46b0edd1046329fa3e3a730d59a6a263f72cbd0 Author: Takeshi Yamamuro AuthorDate: Sun Jan 13 00:26:02 2019 + Preparing development version 2.3.4-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 42 insertions(+), 42 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 6ec4966..a82446e 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.3.3 +Version: 2.3.4 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), diff --git a/assembly/pom.xml b/assembly/pom.xml index 6a8cd4f..612a1b8 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 6010b6e..5547e97 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 8b5d3c8..119dde2 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index dd27a24..dba5224 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index aded5e7d..56902a3 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index a50f612..5302d95 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 8112ca4..232ebfa 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 0d5f61f..f0baa2a 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache
[spark] branch branch-2.3 updated (6d063ee -> e46b0ed)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git. from 6d063ee [SPARK-26538][SQL] Set default precision and scale for elements of postgres numeric array add 0e3d5fd Preparing Spark release v2.3.3-rc1 new e46b0ed Preparing development version 2.3.4-SNAPSHOT The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 42 insertions(+), 42 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] tag v2.3.3-rc1 created (now 0e3d5fd)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to tag v2.3.3-rc1 in repository https://gitbox.apache.org/repos/asf/spark.git. at 0e3d5fd (commit) This tag includes the following new commits: new 0e3d5fd Preparing Spark release v2.3.3-rc1 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] tag v2.3.3-rc1 deleted (was 0e3d5fd)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to tag v2.3.3-rc1 in repository https://gitbox.apache.org/repos/asf/spark.git. *** WARNING: tag v2.3.3-rc1 was deleted! *** was 0e3d5fd Preparing Spark release v2.3.3-rc1 The revisions that were on this tag are still contained in other references; therefore, this change does not discard any commits from the repository. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r31941 - /dev/spark/v2.3.3-rc1-bin/
Author: yamamuro Date: Sun Jan 13 13:00:54 2019 New Revision: 31941 Log: Apache Spark v2.3.3-rc1 Added: dev/spark/v2.3.3-rc1-bin/ dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz (with props) dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.asc dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.sha512 dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz (with props) dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.asc dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.sha512 dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-hadoop2.6.tgz (with props) dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-hadoop2.6.tgz.asc dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-hadoop2.6.tgz.sha512 dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-hadoop2.7.tgz (with props) dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-hadoop2.7.tgz.asc dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-hadoop2.7.tgz.sha512 dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-without-hadoop.tgz (with props) dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-without-hadoop.tgz.asc dev/spark/v2.3.3-rc1-bin/spark-2.3.3-bin-without-hadoop.tgz.sha512 dev/spark/v2.3.3-rc1-bin/spark-2.3.3.tgz (with props) dev/spark/v2.3.3-rc1-bin/spark-2.3.3.tgz.asc dev/spark/v2.3.3-rc1-bin/spark-2.3.3.tgz.sha512 Added: dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz == Binary file - no diff available. Propchange: dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.asc == --- dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.asc (added) +++ dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.asc Sun Jan 13 13:00:54 2019 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v1 + +iQIcBAABAgAGBQJcOwjWAAoJEG7F8QUt8I/040oP/3LeYlekdoGa2+S+TIoLD34V +DzyuVjHdHNjV5FEBXChd4hW1x+ZQng7KHxLEzuWV1DFYLlG1JQShWou3JHXK4i5c +pBlDpVeNmLT4t2qLiARf+7D+hdPBUHI0h4PvQRN5X+I3BWl0cTAvAKIV5AKZK8dF +E8sM+u7o7RbJ/s6SBAE3oDzGOSLXTcK1xG8X3WIXZrQS8rPwtWQvlnigPYUOc0Ef +NI0mLJ967Lt+EoTyy3JiOYwh5TexYmSR6f7o1xeniCtmS41BBX5iv89XLV6iryi8 +aFrCadlKFnm+Nd9lR+oKMXmSKu0jXbzjI6tOH+C43pjeZK49wxh/PMlHFYqehN8j +r8LnC72nyT8+Lc41bVSyj7z/dBr9XsonEOYkt/5BM5FgwVCRGWVZlgAHJ0GtJz+N +lsZm0Bo7CGvFHH8M3kogPQ+X0q62be9Bc5T7bCnIlXOKYOGtOK80Ejmm5bj1Kkc5 +fKmUcZ/faBpU3svNbTrdsUYxNPWMkw4nkSyVP24LDaMpwD9B3S/D3CF5VnprApW7 +tzlWwbhTSHOENfE2Jj6THZ8IIcLQf2QAFnNGtnUg5ehMt+1AGhJYPA0TtLEAeKO2 +0O9MqgWgoTO4SqBVoDevj1odSOHg1CMY1DFkyEPOqPWSgdAsng9ngSouVCqvVw3G +rrNQlTIbYFlMKdorQOHa +=Eqvf +-END PGP SIGNATURE- Added: dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.sha512 == --- dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.sha512 (added) +++ dev/spark/v2.3.3-rc1-bin/SparkR_2.3.3.tar.gz.sha512 Sun Jan 13 13:00:54 2019 @@ -0,0 +1,3 @@ +SparkR_2.3.3.tar.gz: 08B0721E 27126811 09C2ED6D D3F12B80 5F892113 58B3C528 + 80738E46 F888B41C EB9492B4 156E1A85 DFDEC6C1 3696E341 + 767A4976 1DA3A4A2 EF3F6DDC 2AFB295F Added: dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz == Binary file - no diff available. Propchange: dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.asc == --- dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.asc (added) +++ dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.asc Sun Jan 13 13:00:54 2019 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v1 + +iQIcBAABAgAGBQJcOwjZAAoJEG7F8QUt8I/0ETYP/2zyxOg8P26aNwVk1BCKEj2Q +rSrcef5LY4k8mFPPJQdIyqnNaJrrY5WZXWISyklen6e2vEyYQzGoXAiXpwxEShPx +CTsbiQTJ7jrhXoRe5UVI0jboz1LTF6xY6OOeZRWG8pwaCttlELt0uNEE3q1ZC7L/ +P/LA+lhan8avnfDS+7b0pPdAiomA7QAIoC+5DoA/I4Cyv2TlHSrDfrAWXjERrgJG +q0pRNCKqMhgWolGsrKeHS/PjYJjmCVRlCygOgn1iLBa6bhQoMPuUgsaNMQ6LuwdJ +cMltrLJvMAW0Chm4RoECSqTjoPRxGrZ2OlQ8brllDqQBFDAfpf5A+8MVqYBbCguu +zPBVQkEbT2tLcH9mdUP4jcX7xIcSio3b5jmST+BiPXS1eXsnUxW+yrGuLJkX7XEN +kNI73gwJCvdUEUQ/NEQOopyalAQIIfUAnP3Uj8K6rm/ll6OMJa/UTJ04RkR2wzlY +EiNQcuvnboU549KY2xqzRI6h3GEVYWKH9gnyhI7CvQRLDJz8nV4mOulIh8ZrFSzv +FUhnIYeDLu5F39G+1lbKeYnoti/yopaxxh/Uxz0AWSYhTTXStbYDhG9d/7jUGxwH +djZuN0eV2lnTGGW+mYzih3QUdcp/vG7lJir2TEsGhcKhoxaKfSyxmtClBZ9p5TOF +pCKMj0eKdzkvpi7VFimI +=8i3D +-END PGP SIGNATURE- Added: dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.sha512 == --- dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.sha512 (added) +++ dev/spark/v2.3.3-rc1-bin/pyspark-2.3.3.tar.gz.sha512
svn commit: r31943 - in /dev/spark/v2.3.3-rc1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _site/api/java/org/apache/spark
Author: yamamuro Date: Sun Jan 13 15:35:15 2019 New Revision: 31943 Log: Apache Spark v2.3.3-rc1 docs [This commit notification would consist of 1447 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-26894][SQL] Handle Alias as well in AggregateEstimation to propagate child stats
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new b1857a4 [SPARK-26894][SQL] Handle Alias as well in AggregateEstimation to propagate child stats b1857a4 is described below commit b1857a4d7dfe17663f8adccd7825d890ae70d2a1 Author: Venkata krishnan Sowrirajan AuthorDate: Thu Mar 21 11:21:56 2019 +0900 [SPARK-26894][SQL] Handle Alias as well in AggregateEstimation to propagate child stats ## What changes were proposed in this pull request? Currently aliases are not handled in the Aggregate Estimation due to which stats are not getting propagated. This causes CBO join-reordering to not give optimal join plans. ProjectEstimation is already taking care of aliases, we need same logic for AggregateEstimation as well to properly propagate stats when CBO is enabled. ## How was this patch tested? This patch is manually tested using the query Q83 of TPCDS benchmark (scale 1000) Closes #23803 from venkata91/aggstats. Authored-by: Venkata krishnan Sowrirajan Signed-off-by: Takeshi Yamamuro --- .../statsEstimation/AggregateEstimation.scala | 7 +-- .../logical/statsEstimation/EstimationUtils.scala | 14 - .../statsEstimation/ProjectEstimation.scala| 10 +++-- .../statsEstimation/AggregateEstimationSuite.scala | 24 ++ 4 files changed, 45 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala index eb56ab4..0606d0d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/AggregateEstimation.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql.catalyst.plans.logical.statsEstimation -import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap} import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Statistics} @@ -52,7 +52,10 @@ object AggregateEstimation { outputRows.min(childStats.rowCount.get) } - val outputAttrStats = getOutputMap(childStats.attributeStats, agg.output) + val aliasStats = EstimationUtils.getAliasStats(agg.expressions, childStats.attributeStats) + + val outputAttrStats = getOutputMap( +AttributeMap(childStats.attributeStats.toSeq ++ aliasStats), agg.output) Some(Statistics( sizeInBytes = getOutputSize(agg.output, outputRows, outputAttrStats), rowCount = Some(outputRows), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala index 211a2a0..11d2f02 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/EstimationUtils.scala @@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.plans.logical.statsEstimation import scala.collection.mutable.ArrayBuffer import scala.math.BigDecimal.RoundingMode -import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeMap} +import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeMap, Expression} import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.types.{DecimalType, _} @@ -71,6 +71,18 @@ object EstimationUtils { AttributeMap(output.flatMap(a => inputMap.get(a).map(a -> _))) } + /** + * Returns the stats for aliases of child's attributes + */ + def getAliasStats( + expressions: Seq[Expression], + attributeStats: AttributeMap[ColumnStat]): Seq[(Attribute, ColumnStat)] = { +expressions.collect { + case alias @ Alias(attr: Attribute, _) if attributeStats.contains(attr) => +alias.toAttribute -> attributeStats(attr) +} + } + def getSizePerRow( attributes: Seq[Attribute], attrStats: AttributeMap[ColumnStat] = AttributeMap(Nil)): BigInt = { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scala index 489eb90..6925423 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/ProjectEstimation.scal
[spark] branch master updated: [SPARK-26771][SQL][FOLLOWUP] Make all the uncache operations non-blocking by default
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 956b52b1 [SPARK-26771][SQL][FOLLOWUP] Make all the uncache operations non-blocking by default 956b52b1 is described below commit 956b52b1670985a67e49b938ac1499ae65c79f6e Author: Takeshi Yamamuro AuthorDate: Wed Mar 27 21:01:36 2019 +0900 [SPARK-26771][SQL][FOLLOWUP] Make all the uncache operations non-blocking by default ## What changes were proposed in this pull request? To make the blocking behaviour consistent, this pr made catalog table/view `uncacheQuery` non-blocking by default. If this pr merged, all the behaviours in spark are non-blocking by default. ## How was this patch tested? Pass Jenkins. Closes #24212 from maropu/SPARK-26771-FOLLOWUP. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../main/scala/org/apache/spark/sql/Dataset.scala | 3 +- .../apache/spark/sql/execution/CacheManager.scala | 8 +-- .../sql/execution/columnar/InMemoryRelation.scala | 2 +- .../apache/spark/sql/internal/CatalogImpl.scala| 6 +- .../org/apache/spark/sql/CachedTableSuite.scala| 69 +- .../apache/spark/sql/hive/CachedTableSuite.scala | 15 - 6 files changed, 62 insertions(+), 41 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 2accb32..69c2f61 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2956,7 +2956,8 @@ class Dataset[T] private[sql]( * @since 1.6.0 */ def unpersist(blocking: Boolean): this.type = { -sparkSession.sharedState.cacheManager.uncacheQuery(this, cascade = false, blocking) +sparkSession.sharedState.cacheManager.uncacheQuery( + sparkSession, logicalPlan, cascade = false, blocking) this } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index 0145478..d1f096b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -98,13 +98,11 @@ class CacheManager extends Logging { * @param query The [[Dataset]] to be un-cached. * @param cascade If true, un-cache all the cache entries that refer to the given * [[Dataset]]; otherwise un-cache the given [[Dataset]] only. - * @param blocking Whether to block until all blocks are deleted. */ def uncacheQuery( query: Dataset[_], - cascade: Boolean, - blocking: Boolean = true): Unit = { -uncacheQuery(query.sparkSession, query.logicalPlan, cascade, blocking) + cascade: Boolean): Unit = { +uncacheQuery(query.sparkSession, query.logicalPlan, cascade) } /** @@ -119,7 +117,7 @@ class CacheManager extends Logging { spark: SparkSession, plan: LogicalPlan, cascade: Boolean, - blocking: Boolean): Unit = { + blocking: Boolean = false): Unit = { val shouldRemove: LogicalPlan => Boolean = if (cascade) { _.find(_.sameResult(plan)).isDefined diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala index 1e4453f..1af5033 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala @@ -65,7 +65,7 @@ case class CachedRDDBuilder( _cachedColumnBuffers } - def clearCache(blocking: Boolean = true): Unit = { + def clearCache(blocking: Boolean = false): Unit = { if (_cachedColumnBuffers != null) { synchronized { if (_cachedColumnBuffers != null) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala index 4698e8a..5e7d17b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala @@ -365,7 +365,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog { override def dropTempView(viewName: String): Boolean = { sparkSession.sessionState.catalog.getTempView(viewName).exists { viewDef => sparkSession.sharedState.cacheManager.uncacheQuery( -sparkSession, viewDef, cascade = false, blocking = true) +sparkSession, viewDef, cascade = false) sessionCatalog.dropTempView(viewName) } } @@
[spark] branch master updated: [SPARK-27288][SQL] Pruning nested field in complex map key from object serializers
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 93ff690 [SPARK-27288][SQL] Pruning nested field in complex map key from object serializers 93ff690 is described below commit 93ff69003b228abcf08da4488593f552e3a61665 Author: Liang-Chi Hsieh AuthorDate: Wed Mar 27 19:40:14 2019 +0900 [SPARK-27288][SQL] Pruning nested field in complex map key from object serializers ## What changes were proposed in this pull request? In the original PR #24158, pruning nested field in complex map key was not supported, because some methods in schema pruning did't support it at that moment. This is a followup to add it. ## How was this patch tested? Added tests. Closes #24220 from viirya/SPARK-26847-followup. Authored-by: Liang-Chi Hsieh Signed-off-by: Takeshi Yamamuro --- .../apache/spark/sql/catalyst/optimizer/objects.scala | 13 ++--- .../optimizer/ObjectSerializerPruningSuite.scala | 5 +++-- .../apache/spark/sql/DatasetOptimizationSuite.scala | 19 ++- 3 files changed, 31 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala index 8e92421..c48bd8f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/objects.scala @@ -131,8 +131,8 @@ object ObjectSerializerPruning extends Rule[LogicalPlan] { fields.map(f => collectStructType(f.dataType, structs)) case ArrayType(elementType, _) => collectStructType(elementType, structs) - case MapType(_, valueType, _) => -// Because we can't select a field from struct in key, so we skip key type. + case MapType(keyType, valueType, _) => +collectStructType(keyType, structs) collectStructType(valueType, structs) // We don't use UserDefinedType in those serializers. case _: UserDefinedType[_] => @@ -179,13 +179,20 @@ object ObjectSerializerPruning extends Rule[LogicalPlan] { val transformedSerializer = serializer.transformDown { case m: ExternalMapToCatalyst => +val prunedKeyConverter = m.keyConverter.transformDown { + case s: CreateNamedStruct if structTypeIndex < prunedStructTypes.size => +val prunedType = prunedStructTypes(structTypeIndex) +structTypeIndex += 1 +pruneNamedStruct(s, prunedType) +} val prunedValueConverter = m.valueConverter.transformDown { case s: CreateNamedStruct if structTypeIndex < prunedStructTypes.size => val prunedType = prunedStructTypes(structTypeIndex) structTypeIndex += 1 pruneNamedStruct(s, prunedType) } -m.copy(valueConverter = alignNullTypeInIf(prunedValueConverter)) +m.copy(keyConverter = alignNullTypeInIf(prunedKeyConverter), + valueConverter = alignNullTypeInIf(prunedValueConverter)) case s: CreateNamedStruct if structTypeIndex < prunedStructTypes.size => val prunedType = prunedStructTypes(structTypeIndex) structTypeIndex += 1 diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala index fb0f3a3..0dd4d6a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ObjectSerializerPruningSuite.scala @@ -60,8 +60,9 @@ class ObjectSerializerPruningSuite extends PlanTest { Seq(StructType.fromDDL("a struct, b int"), StructType.fromDDL("a int, b int")), Seq(StructType.fromDDL("a int, b int, c string")), - Seq.empty[StructType], - Seq(StructType.fromDDL("c long, d string")) + Seq(StructType.fromDDL("a struct, b int"), +StructType.fromDDL("a int, b int")), + Seq(StructType.fromDDL("a int, b int"), StructType.fromDDL("c long, d string")) ) dataTypes.zipWithIndex.foreach { case (dt, idx) => diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala index 69634f8..cfbb343 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetOptimizationSui
[spark] branch master updated: [SPARK-25196][SQL][FOLLOWUP] Fix wrong tests in StatisticsCollectionSuite
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new b8a0f98 [SPARK-25196][SQL][FOLLOWUP] Fix wrong tests in StatisticsCollectionSuite b8a0f98 is described below commit b8a0f981f2f69cb7ea56626b6ed3143276beb824 Author: Takeshi Yamamuro AuthorDate: Mon Mar 25 21:02:01 2019 +0900 [SPARK-25196][SQL][FOLLOWUP] Fix wrong tests in StatisticsCollectionSuite ## What changes were proposed in this pull request? This is a follow-up of #24047 and it fixed wrong tests in `StatisticsCollectionSuite`. ## How was this patch tested? Pass Jenkins. Closes #24198 from maropu/SPARK-25196-FOLLOWUP-2. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala index b76678f..d071efb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala @@ -539,9 +539,9 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared withTempDatabase { database => sql(s"CREATE VIEW $database.v AS SELECT 1 c") sql(s"CACHE TABLE $database.v") - assert(getStatAttrNames(s"$database.v") !== Set("id")) + assert(getStatAttrNames(s"$database.v") !== Set("c")) sql(s"ANALYZE TABLE $database.v COMPUTE STATISTICS FOR COLUMNS c") - assert(getStatAttrNames(s"$database.v") !== Set("id")) + assert(getStatAttrNames(s"$database.v") === Set("c")) } } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-26660][FOLLOWUP] Raise task serialized size warning threshold to 1000 KiB
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 3a8398d [SPARK-26660][FOLLOWUP] Raise task serialized size warning threshold to 1000 KiB 3a8398d is described below commit 3a8398df5cf87f597e672bfbb8c6eadbad800d03 Author: Sean Owen AuthorDate: Wed Mar 27 10:42:26 2019 +0900 [SPARK-26660][FOLLOWUP] Raise task serialized size warning threshold to 1000 KiB ## What changes were proposed in this pull request? Raise the threshold size for serialized task size at which a warning is generated from 100KiB to 1000KiB. As several people have noted, the original change for this JIRA highlighted that this threshold is low. Test output regularly shows: ``` - sorting on StringType with nullable=false, sortOrder=List('a DESC NULLS LAST) 22:47:53.320 WARN org.apache.spark.scheduler.TaskSetManager: Stage 80 contains a task of very large size (755 KiB). The maximum recommended task size is 100 KiB. 22:47:53.348 WARN org.apache.spark.scheduler.TaskSetManager: Stage 81 contains a task of very large size (755 KiB). The maximum recommended task size is 100 KiB. 22:47:53.417 WARN org.apache.spark.scheduler.TaskSetManager: Stage 83 contains a task of very large size (755 KiB). The maximum recommended task size is 100 KiB. 22:47:53.444 WARN org.apache.spark.scheduler.TaskSetManager: Stage 84 contains a task of very large size (755 KiB). The maximum recommended task size is 100 KiB. ... - SPARK-20688: correctly check analysis for scalar sub-queries 22:49:10.314 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 150.8 KiB - SPARK-21835: Join in correlated subquery should be duplicateResolved: case 1 22:49:10.595 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 150.7 KiB 22:49:10.744 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 150.7 KiB 22:49:10.894 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 150.7 KiB - SPARK-21835: Join in correlated subquery should be duplicateResolved: case 2 - SPARK-21835: Join in correlated subquery should be duplicateResolved: case 3 - SPARK-23316: AnalysisException after max iteration reached for IN query 22:49:11.559 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 154.2 KiB ``` It seems that a larger threshold of about 1MB is more suitable. ## How was this patch tested? Existing tests. Closes #24226 from srowen/SPARK-26660.2. Authored-by: Sean Owen Signed-off-by: Takeshi Yamamuro --- core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index ea31fe8..3977c0b 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -,5 +,5 @@ private[spark] class TaskSetManager( private[spark] object TaskSetManager { // The user will be warned if any stages contain a task that has a serialized size greater than // this. - val TASK_SIZE_TO_WARN_KIB = 100 + val TASK_SIZE_TO_WARN_KIB = 1000 } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-26851][SQL][FOLLOWUP] Fix cachedColumnBuffers field for Scala 2.11 build
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 95bb012 [SPARK-26851][SQL][FOLLOWUP] Fix cachedColumnBuffers field for Scala 2.11 build 95bb012 is described below commit 95bb01282cc94f95bbc69aafcbc1550b137238be Author: Sean Owen AuthorDate: Fri Feb 22 15:22:52 2019 +0900 [SPARK-26851][SQL][FOLLOWUP] Fix cachedColumnBuffers field for Scala 2.11 build ## What changes were proposed in this pull request? Per https://github.com/apache/spark/pull/23768/files#r259083019 the last change to this line here caused the 2.11 build to fail. It's worked around by making `_cachedColumnBuffers` a field, as it was never set by callers to anything other than its default of null. ## How was this patch tested? Existing tests. Closes #23864 from srowen/SPARK-26851.2. Authored-by: Sean Owen Signed-off-by: Takeshi Yamamuro --- .../main/scala/org/apache/spark/sql/execution/CacheManager.scala | 3 +-- .../org/apache/spark/sql/execution/columnar/InMemoryRelation.scala | 7 --- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index c6ee735..f7a78ea 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -213,8 +213,7 @@ class CacheManager extends Logging { cd.cachedRepresentation.cacheBuilder.clearCache() val plan = spark.sessionState.executePlan(cd.plan).executedPlan val newCache = InMemoryRelation( -cacheBuilder = cd.cachedRepresentation - .cacheBuilder.copy(cachedPlan = plan)(_cachedColumnBuffers = null), +cacheBuilder = cd.cachedRepresentation.cacheBuilder.copy(cachedPlan = plan), logicalPlan = cd.plan) val recomputedPlan = cd.copy(cachedRepresentation = newCache) writeLock { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala index bc6e958..7180853 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala @@ -48,8 +48,9 @@ case class CachedRDDBuilder( batchSize: Int, storageLevel: StorageLevel, @transient cachedPlan: SparkPlan, -tableName: Option[String])( -@transient @volatile private var _cachedColumnBuffers: RDD[CachedBatch] = null) { +tableName: Option[String]) { + + @transient @volatile private var _cachedColumnBuffers: RDD[CachedBatch] = null val sizeInBytesStats: LongAccumulator = cachedPlan.sqlContext.sparkContext.longAccumulator @@ -143,7 +144,7 @@ object InMemoryRelation { child: SparkPlan, tableName: Option[String], logicalPlan: LogicalPlan): InMemoryRelation = { -val cacheBuilder = CachedRDDBuilder(useCompression, batchSize, storageLevel, child, tableName)() +val cacheBuilder = CachedRDDBuilder(useCompression, batchSize, storageLevel, child, tableName) new InMemoryRelation(child.output, cacheBuilder, logicalPlan.outputOrdering)( statsOfPlanToCache = logicalPlan.stats) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-26851][SQL][FOLLOWUP] Fix cachedColumnBuffers field for Scala 2.11 build
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 95bb012 [SPARK-26851][SQL][FOLLOWUP] Fix cachedColumnBuffers field for Scala 2.11 build 95bb012 is described below commit 95bb01282cc94f95bbc69aafcbc1550b137238be Author: Sean Owen AuthorDate: Fri Feb 22 15:22:52 2019 +0900 [SPARK-26851][SQL][FOLLOWUP] Fix cachedColumnBuffers field for Scala 2.11 build ## What changes were proposed in this pull request? Per https://github.com/apache/spark/pull/23768/files#r259083019 the last change to this line here caused the 2.11 build to fail. It's worked around by making `_cachedColumnBuffers` a field, as it was never set by callers to anything other than its default of null. ## How was this patch tested? Existing tests. Closes #23864 from srowen/SPARK-26851.2. Authored-by: Sean Owen Signed-off-by: Takeshi Yamamuro --- .../main/scala/org/apache/spark/sql/execution/CacheManager.scala | 3 +-- .../org/apache/spark/sql/execution/columnar/InMemoryRelation.scala | 7 --- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index c6ee735..f7a78ea 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -213,8 +213,7 @@ class CacheManager extends Logging { cd.cachedRepresentation.cacheBuilder.clearCache() val plan = spark.sessionState.executePlan(cd.plan).executedPlan val newCache = InMemoryRelation( -cacheBuilder = cd.cachedRepresentation - .cacheBuilder.copy(cachedPlan = plan)(_cachedColumnBuffers = null), +cacheBuilder = cd.cachedRepresentation.cacheBuilder.copy(cachedPlan = plan), logicalPlan = cd.plan) val recomputedPlan = cd.copy(cachedRepresentation = newCache) writeLock { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala index bc6e958..7180853 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala @@ -48,8 +48,9 @@ case class CachedRDDBuilder( batchSize: Int, storageLevel: StorageLevel, @transient cachedPlan: SparkPlan, -tableName: Option[String])( -@transient @volatile private var _cachedColumnBuffers: RDD[CachedBatch] = null) { +tableName: Option[String]) { + + @transient @volatile private var _cachedColumnBuffers: RDD[CachedBatch] = null val sizeInBytesStats: LongAccumulator = cachedPlan.sqlContext.sparkContext.longAccumulator @@ -143,7 +144,7 @@ object InMemoryRelation { child: SparkPlan, tableName: Option[String], logicalPlan: LogicalPlan): InMemoryRelation = { -val cacheBuilder = CachedRDDBuilder(useCompression, batchSize, storageLevel, child, tableName)() +val cacheBuilder = CachedRDDBuilder(useCompression, batchSize, storageLevel, child, tableName) new InMemoryRelation(child.output, cacheBuilder, logicalPlan.outputOrdering)( statsOfPlanToCache = logicalPlan.stats) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-26215][SQL] Define reserved/non-reserved keywords based on the ANSI SQL standard
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 967e4cb [SPARK-26215][SQL] Define reserved/non-reserved keywords based on the ANSI SQL standard 967e4cb is described below commit 967e4cb0112e2dd94bc75251c23bb9e854ee97a0 Author: Takeshi Yamamuro AuthorDate: Sat Feb 23 08:38:47 2019 +0900 [SPARK-26215][SQL] Define reserved/non-reserved keywords based on the ANSI SQL standard ## What changes were proposed in this pull request? This pr targeted to define reserved/non-reserved keywords for Spark SQL based on the ANSI SQL standards and the other database-like systems (e.g., PostgreSQL). We assume that they basically follow the ANSI SQL-2011 standard, but it is slightly different between each other. Therefore, this pr documented all the keywords in `docs/sql-reserved-and-non-reserved-key-words.md`. NOTE: This pr only added a small set of keywords as reserved ones and these keywords are reserved in all the ANSI SQL standards (SQL-92, SQL-99, SQL-2003, SQL-2008, SQL-2011, and SQL-2016) and PostgreSQL. This is because there is room to discuss which keyword should be reserved or not, .e.g., interval units (day, hour, minute, second, ...) are reserved in the ANSI SQL standards though, they are not reserved in PostgreSQL. Therefore, we need more researches about the other database-lik [...] References: - The reserved/non-reserved SQL keywords in the ANSI SQL standards: https://developer.mimer.com/wp-content/uploads/2018/05/Standard-SQL-Reserved-Words-Summary.pdf - SQL Key Words in PostgreSQL: https://www.postgresql.org/docs/current/sql-keywords-appendix.html ## How was this patch tested? Added tests in `TableIdentifierParserSuite`. Closes #23259 from maropu/SPARK-26215-WIP. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- docs/_data/menu-sql.yaml | 2 + docs/sql-reserved-and-non-reserved-keywords.md | 574 + .../apache/spark/sql/catalyst/parser/SqlBase.g4| 127 +++-- .../spark/sql/catalyst/parser/ParseDriver.scala| 2 + .../org/apache/spark/sql/internal/SQLConf.scala| 8 + 5 files changed, 675 insertions(+), 38 deletions(-) diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml index cd065ea..9bbb115 100644 --- a/docs/_data/menu-sql.yaml +++ b/docs/_data/menu-sql.yaml @@ -70,6 +70,8 @@ url: sql-migration-guide-upgrade.html - text: Compatibility with Apache Hive url: sql-migration-guide-hive-compatibility.html +- text: SQL Reserved/Non-Reserved Keywords + url: sql-reserved-and-non-reserved-keywords.html - text: Reference url: sql-reference.html subitems: diff --git a/docs/sql-reserved-and-non-reserved-keywords.md b/docs/sql-reserved-and-non-reserved-keywords.md new file mode 100644 index 000..321fb3f --- /dev/null +++ b/docs/sql-reserved-and-non-reserved-keywords.md @@ -0,0 +1,574 @@ +--- +layout: global +title: SQL Reserved/Non-Reserved Keywords +displayTitle: SQL Reserved/Non-Reserved Keywords +--- + +In Spark SQL, there are 2 kinds of keywords: non-reserved and reserved. Non-reserved keywords have a +special meaning only in particular contexts and can be used as identifiers (e.g., table names, view names, +column names, column aliases, table aliases) in other contexts. Reserved keywords can't be used as +table alias, but can be used as other identifiers. + +The list of reserved and non-reserved keywords can change according to the config +`spark.sql.parser.ansi.enabled`, which is false by default. + + + KeywordSpark SQLSQL-2011 + ANSI modedefault mode + ABSnon-reservednon-reservedreserved + ABSOLUTEnon-reservednon-reservednon-reserved + ACOSnon-reservednon-reservednon-reserved + ACTIONnon-reservednon-reservednon-reserved + ADDnon-reservednon-reservednon-reserved + AFTERnon-reservednon-reservednon-reserved + ALLreservednon-reservedreserved + ALLOCATEnon-reservednon-reservedreserved + ALTERnon-reservednon-reservedreserved + ANALYZEnon-reservednon-reservednon-reserved + ANDreservednon-reservedreserved + ANTIreservedreservednon-reserved + ANYreservednon-reservedreserved + AREnon-reservednon-reservedreserved + ARCHIVEnon-reservednon-reservednon-reserved + ARRAYnon-reservednon-reservedreserved + ARRAY_AGGnon-reservednon-reservedreserved + ARRAY_MAX_CARDINALITYnon-reservednon-reservedreserved + ASreservednon-reservedreserved + ASCnon-reservednon-reservednon-reserved + ASENSITIVEnon-reservednon-reservedreserved + ASINnon-reservednon-reservedreserved + ASSERTIONnon-reservednon-reservednon-reserved + ASYMMETRICnon-reservednon-reservedreserved + ATnon-reservednon-reservedreserved + ATANnon-reservednon-reservednon-reserved + ATOMICnon
[spark] branch master updated: [SPARK-26215][SQL] Define reserved/non-reserved keywords based on the ANSI SQL standard
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 967e4cb [SPARK-26215][SQL] Define reserved/non-reserved keywords based on the ANSI SQL standard 967e4cb is described below commit 967e4cb0112e2dd94bc75251c23bb9e854ee97a0 Author: Takeshi Yamamuro AuthorDate: Sat Feb 23 08:38:47 2019 +0900 [SPARK-26215][SQL] Define reserved/non-reserved keywords based on the ANSI SQL standard ## What changes were proposed in this pull request? This pr targeted to define reserved/non-reserved keywords for Spark SQL based on the ANSI SQL standards and the other database-like systems (e.g., PostgreSQL). We assume that they basically follow the ANSI SQL-2011 standard, but it is slightly different between each other. Therefore, this pr documented all the keywords in `docs/sql-reserved-and-non-reserved-key-words.md`. NOTE: This pr only added a small set of keywords as reserved ones and these keywords are reserved in all the ANSI SQL standards (SQL-92, SQL-99, SQL-2003, SQL-2008, SQL-2011, and SQL-2016) and PostgreSQL. This is because there is room to discuss which keyword should be reserved or not, .e.g., interval units (day, hour, minute, second, ...) are reserved in the ANSI SQL standards though, they are not reserved in PostgreSQL. Therefore, we need more researches about the other database-lik [...] References: - The reserved/non-reserved SQL keywords in the ANSI SQL standards: https://developer.mimer.com/wp-content/uploads/2018/05/Standard-SQL-Reserved-Words-Summary.pdf - SQL Key Words in PostgreSQL: https://www.postgresql.org/docs/current/sql-keywords-appendix.html ## How was this patch tested? Added tests in `TableIdentifierParserSuite`. Closes #23259 from maropu/SPARK-26215-WIP. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- docs/_data/menu-sql.yaml | 2 + docs/sql-reserved-and-non-reserved-keywords.md | 574 + .../apache/spark/sql/catalyst/parser/SqlBase.g4| 127 +++-- .../spark/sql/catalyst/parser/ParseDriver.scala| 2 + .../org/apache/spark/sql/internal/SQLConf.scala| 8 + 5 files changed, 675 insertions(+), 38 deletions(-) diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml index cd065ea..9bbb115 100644 --- a/docs/_data/menu-sql.yaml +++ b/docs/_data/menu-sql.yaml @@ -70,6 +70,8 @@ url: sql-migration-guide-upgrade.html - text: Compatibility with Apache Hive url: sql-migration-guide-hive-compatibility.html +- text: SQL Reserved/Non-Reserved Keywords + url: sql-reserved-and-non-reserved-keywords.html - text: Reference url: sql-reference.html subitems: diff --git a/docs/sql-reserved-and-non-reserved-keywords.md b/docs/sql-reserved-and-non-reserved-keywords.md new file mode 100644 index 000..321fb3f --- /dev/null +++ b/docs/sql-reserved-and-non-reserved-keywords.md @@ -0,0 +1,574 @@ +--- +layout: global +title: SQL Reserved/Non-Reserved Keywords +displayTitle: SQL Reserved/Non-Reserved Keywords +--- + +In Spark SQL, there are 2 kinds of keywords: non-reserved and reserved. Non-reserved keywords have a +special meaning only in particular contexts and can be used as identifiers (e.g., table names, view names, +column names, column aliases, table aliases) in other contexts. Reserved keywords can't be used as +table alias, but can be used as other identifiers. + +The list of reserved and non-reserved keywords can change according to the config +`spark.sql.parser.ansi.enabled`, which is false by default. + + + KeywordSpark SQLSQL-2011 + ANSI modedefault mode + ABSnon-reservednon-reservedreserved + ABSOLUTEnon-reservednon-reservednon-reserved + ACOSnon-reservednon-reservednon-reserved + ACTIONnon-reservednon-reservednon-reserved + ADDnon-reservednon-reservednon-reserved + AFTERnon-reservednon-reservednon-reserved + ALLreservednon-reservedreserved + ALLOCATEnon-reservednon-reservedreserved + ALTERnon-reservednon-reservedreserved + ANALYZEnon-reservednon-reservednon-reserved + ANDreservednon-reservedreserved + ANTIreservedreservednon-reserved + ANYreservednon-reservedreserved + AREnon-reservednon-reservedreserved + ARCHIVEnon-reservednon-reservednon-reserved + ARRAYnon-reservednon-reservedreserved + ARRAY_AGGnon-reservednon-reservedreserved + ARRAY_MAX_CARDINALITYnon-reservednon-reservedreserved + ASreservednon-reservedreserved + ASCnon-reservednon-reservednon-reserved + ASENSITIVEnon-reservednon-reservedreserved + ASINnon-reservednon-reservedreserved + ASSERTIONnon-reservednon-reservednon-reserved + ASYMMETRICnon-reservednon-reservedreserved + ATnon-reservednon-reservedreserved + ATANnon-reservednon-reservednon-reserved + ATOMICnon
[spark] branch master updated: [SPARK-22000][SQL][FOLLOW-UP] Fix bad test to ensure it can test properly
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 4adbcdc [SPARK-22000][SQL][FOLLOW-UP] Fix bad test to ensure it can test properly 4adbcdc is described below commit 4adbcdc42478b61fa02047d54f3d3705d5b1ecc7 Author: Jungtaek Lim (HeartSaVioR) AuthorDate: Sun Mar 17 08:25:40 2019 +0900 [SPARK-22000][SQL][FOLLOW-UP] Fix bad test to ensure it can test properly ## What changes were proposed in this pull request? There was some mistake on test code: it has wrong assertion. The patch proposes fixing it, as well as fixing other stuff to make test really pass. ## How was this patch tested? Fixed unit test. Closes #24112 from HeartSaVioR/SPARK-22000-hotfix. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Takeshi Yamamuro --- .../java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java index f59afef..c5f3867 100644 --- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java +++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaBeanDeserializationSuite.java @@ -154,7 +154,7 @@ public class JavaBeanDeserializationSuite implements Serializable { List records = dataset.collectAsList(); -Assert.assertEquals(records, records); +Assert.assertEquals(expectedRecords, records); } @Test @@ -211,7 +211,7 @@ public class JavaBeanDeserializationSuite implements Serializable { record.setDoubleField(String.valueOf(recordRow.getDouble(4))); record.setStringField(recordRow.getString(5)); record.setBooleanField(String.valueOf(recordRow.getBoolean(6))); - record.setTimestampField(String.valueOf(recordRow.getTimestamp(7).getTime() * 1000)); +record.setTimestampField(String.valueOf(recordRow.getTimestamp(7))); // This would figure out that null value will not become "null". record.setNullIntField(null); return record; - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-27161][SQL] improve the document of SQL keywords
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new dbcb479 [SPARK-27161][SQL] improve the document of SQL keywords dbcb479 is described below commit dbcb4792f2396a31ab620210c6a8177c3b5db10a Author: Wenchen Fan AuthorDate: Mon Mar 18 15:19:52 2019 +0900 [SPARK-27161][SQL] improve the document of SQL keywords ## What changes were proposed in this pull request? Make it more clear about how Spark categories keywords regarding to the config `spark.sql.parser.ansi.enabled` ## How was this patch tested? existing tests Closes #24093 from cloud-fan/parser. Authored-by: Wenchen Fan Signed-off-by: Takeshi Yamamuro --- ...nd-non-reserved-keywords.md => sql-keywords.md} | 48 +++--- .../apache/spark/sql/catalyst/parser/SqlBase.g4| 75 ++ 2 files changed, 74 insertions(+), 49 deletions(-) diff --git a/docs/sql-reserved-and-non-reserved-keywords.md b/docs/sql-keywords.md similarity index 95% rename from docs/sql-reserved-and-non-reserved-keywords.md rename to docs/sql-keywords.md index b1561fb..5ba3ad8 100644 --- a/docs/sql-reserved-and-non-reserved-keywords.md +++ b/docs/sql-keywords.md @@ -1,16 +1,20 @@ --- layout: global -title: SQL Reserved/Non-Reserved Keywords -displayTitle: SQL Reserved/Non-Reserved Keywords +title: Spark SQL Keywords +displayTitle: Spark SQL Keywords --- -In Spark SQL, there are 2 kinds of keywords: non-reserved and reserved. Non-reserved keywords have a -special meaning only in particular contexts and can be used as identifiers (e.g., table names, view names, -column names, column aliases, table aliases) in other contexts. Reserved keywords can't be used as -table alias, but can be used as other identifiers. +When `spark.sql.parser.ansi.enabled` is true, Spark SQL has two kinds of keywords: +* Reserved keywords: Keywords that are reserved and can't be used as identifiers for table, view, column, function, alias, etc. +* Non-reserved keywords: Keywords that have a special meaning only in particular contexts and can be used as identifiers in other contexts. For example, `SELECT 1 WEEK` is an interval literal, but WEEK can be used as identifiers in other places. -The list of reserved and non-reserved keywords can change according to the config -`spark.sql.parser.ansi.enabled`, which is false by default. +When `spark.sql.parser.ansi.enabled` is false, Spark SQL has two kinds of keywords: +* Non-reserved keywords: Same definition as the one when `spark.sql.parser.ansi.enabled=true`. +* Strict-non-reserved keywords: A strict version of non-reserved keywords, which can not be used as table alias. + +By default `spark.sql.parser.ansi.enabled` is false. + +Below is a list of all the keywords in Spark SQL. KeywordSpark SQLSQL-2011 @@ -26,7 +30,7 @@ The list of reserved and non-reserved keywords can change according to the confi ALTERnon-reservednon-reservedreserved ANALYZEnon-reservednon-reservednon-reserved ANDreservednon-reservedreserved - ANTIreservedreservednon-reserved + ANTIreservedstrict-non-reservednon-reserved ANYreservednon-reservedreserved AREnon-reservednon-reservedreserved ARCHIVEnon-reservednon-reservednon-reserved @@ -116,7 +120,7 @@ The list of reserved and non-reserved keywords can change according to the confi COVAR_POPnon-reservednon-reservedreserved COVAR_SAMPnon-reservednon-reservedreserved CREATEreservednon-reservedreserved - CROSSreservedreservedreserved + CROSSreservedstrict-non-reservedreserved CUBEnon-reservednon-reservedreserved CUME_DISTnon-reservednon-reservedreserved CURRENTnon-reservednon-reservedreserved @@ -185,7 +189,7 @@ The list of reserved and non-reserved keywords can change according to the confi ESCAPEnon-reservednon-reservedreserved ESCAPEDnon-reservednon-reservednon-reserved EVERYnon-reservednon-reservedreserved - EXCEPTreservedreservedreserved + EXCEPTreservedstrict-non-reservedreserved EXCEPTIONnon-reservednon-reservednon-reserved EXCHANGEnon-reservednon-reservednon-reserved EXECnon-reservednon-reservedreserved @@ -215,7 +219,7 @@ The list of reserved and non-reserved keywords can change according to the confi FRAME_ROWnon-reservednon-reservedreserved FREEnon-reservednon-reservedreserved FROMreservednon-reservedreserved - FULLreservedreservedreserved + FULLreservedstrict-non-reservedreserved FUNCTIONnon-reservednon-reservedreserved FUNCTIONSnon-reservednon-reservednon-reserved FUSIONnon-reservednon-reservednon-reserved @@ -244,7 +248,7 @@ The list of reserved and non-reserved keywords can change according to the confi INDEXESnon-reservednon-reservednon-reserved INITIALnon-reservednon-reservednon-reserved INITIALLY
[spark] branch master updated: [SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode is on
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 1e9469b [SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode is on 1e9469b is described below commit 1e9469bb7a71b06d610edaaebca933f4219a6eb3 Author: Takeshi Yamamuro AuthorDate: Wed Mar 13 11:20:27 2019 +0900 [SPARK-26976][SQL] Forbid reserved keywords as identifiers when ANSI mode is on ## What changes were proposed in this pull request? This pr added code to forbid reserved keywords as identifiers when ANSI mode is on. This is a follow-up of SPARK-26215(#23259). ## How was this patch tested? Added tests in `TableIdentifierParserSuite`. Closes #23880 from maropu/SPARK-26976. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../apache/spark/sql/catalyst/parser/SqlBase.g4| 84 --- .../parser/TableIdentifierParserSuite.scala| 650 - 2 files changed, 649 insertions(+), 85 deletions(-) diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index c61cda8..d11c28c 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -736,7 +736,6 @@ qualifiedName identifier : strictIdentifier -| {ansi}? ansiReserved | {!ansi}? defaultReserved ; @@ -761,89 +760,6 @@ number | MINUS? BIGDECIMAL_LITERAL #bigDecimalLiteral ; -// NOTE: You must follow a rule below when you add a new ANTLR token in this file: -// - All the ANTLR tokens = UNION(`ansiReserved`, `ansiNonReserved`) = UNION(`defaultReserved`, `nonReserved`) -// -// Let's say you add a new token `NEWTOKEN` and this is not reserved regardless of a `spark.sql.parser.ansi.enabled` -// value. In this case, you must add a token `NEWTOKEN` in both `ansiNonReserved` and `nonReserved`. -// -// It is recommended to list them in alphabetical order. - -// The list of the reserved keywords when `spark.sql.parser.ansi.enabled` is true. Currently, we only reserve -// the ANSI keywords that almost all the ANSI SQL standards (SQL-92, SQL-99, SQL-2003, SQL-2008, SQL-2011, -// and SQL-2016) and PostgreSQL reserve. -ansiReserved -: ALL -| AND -| ANTI -| ANY -| AS -| AUTHORIZATION -| BOTH -| CASE -| CAST -| CHECK -| COLLATE -| COLUMN -| CONSTRAINT -| CREATE -| CROSS -| CURRENT_DATE -| CURRENT_TIME -| CURRENT_TIMESTAMP -| CURRENT_USER -| DISTINCT -| ELSE -| END -| EXCEPT -| FALSE -| FETCH -| FOR -| FOREIGN -| FROM -| FULL -| GRANT -| GROUP -| HAVING -| IN -| INNER -| INTERSECT -| INTO -| IS -| JOIN -| LEADING -| LEFT -| NATURAL -| NOT -| NULL -| ON -| ONLY -| OR -| ORDER -| OUTER -| OVERLAPS -| PRIMARY -| REFERENCES -| RIGHT -| SELECT -| SEMI -| SESSION_USER -| SETMINUS -| SOME -| TABLE -| THEN -| TO -| TRAILING -| UNION -| UNIQUE -| USER -| USING -| WHEN -| WHERE -| WITH -; - - // The list of the non-reserved keywords when `spark.sql.parser.ansi.enabled` is true. ansiNonReserved : ADD diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala index 3d41c27..2725deb 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala @@ -18,8 +18,10 @@ package org.apache.spark.sql.catalyst.parser import org.apache.spark.SparkFunSuite import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.plans.SQLHelper +import org.apache.spark.sql.internal.SQLConf -class TableIdentifierParserSuite extends SparkFunSuite { +class TableIdentifierParserSuite extends SparkFunSuite with SQLHelper { import CatalystSqlParser._ // Add "$elem$", "$value$" & "$key$" @@ -281,6 +283,635 @@ class TableIdentifierParserSuite extends SparkFunSuite { "where", "with") + // All the keywords in `docs/sql-reserved-and-non-reserved-key-words.md` are listed below: + val allCandidateKeywords = Set( +"abs", +"absolute", +"acos", +"action", +"add", +"after", +"all",
[spark] branch master updated: [MINOR][SQL] Throw better exception for Encoder with tuple more than 22 elements
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f0bde69 [MINOR][SQL] Throw better exception for Encoder with tuple more than 22 elements f0bde69 is described below commit f0bde69ebc086df398405e3836dd8958c725ab7f Author: Jungtaek Lim (HeartSaVioR) AuthorDate: Mon Mar 11 13:44:45 2019 +0900 [MINOR][SQL] Throw better exception for Encoder with tuple more than 22 elements ## What changes were proposed in this pull request? This patch proposes to throw better exception with better error message when encoding to tuple which elements are more than 22. **BEFORE** ```scala scala> import org.apache.spark.sql.catalyst.encoders._ scala> val encoders = (0 to 22).map(_ => org.apache.spark.sql.Encoders.scalaInt.asInstanceOf[ExpressionEncoder[_]]) scala> ExpressionEncoder.tuple(encoders) java.lang.ClassNotFoundException: scala.Tuple23 ``` **AFTER** ```scala scala> ExpressionEncoder.tuple(encoders) java.lang.UnsupportedOperationException: Due to Scala's limited support of tuple, tuple with more than 22 elements are not supported. ``` ## How was this patch tested? Added UT. Closes #24046 from HeartSaVioR/MINOR-throw-better-exception-for-tuple-more-than-22. Authored-by: Jungtaek Lim (HeartSaVioR) Signed-off-by: Takeshi Yamamuro --- .../apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala | 6 +- .../spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala | 9 + 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala index bcc4896..2799720 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala @@ -80,7 +80,11 @@ object ExpressionEncoder { * name/positional binding is preserved. */ def tuple(encoders: Seq[ExpressionEncoder[_]]): ExpressionEncoder[_] = { -// TODO: check if encoders length is more than 22 and throw exception for it. +if (encoders.length > 22) { + throw new UnsupportedOperationException("Due to Scala's limited support of tuple, " + +"tuple with more than 22 elements are not supported.") +} + encoders.foreach(_.assertUnresolved()) val cls = Utils.getContextOrSparkClassLoader.loadClass(s"scala.Tuple${encoders.size}") diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala index 1b00506..86e43d7 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala @@ -370,6 +370,15 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes assert(e.getMessage.contains("Cannot use null as map key")) } + test("throw exception for tuples with more than 22 elements") { +val encoders = (0 to 22).map(_ => Encoders.scalaInt.asInstanceOf[ExpressionEncoder[_]]) + +val e = intercept[UnsupportedOperationException] { + ExpressionEncoder.tuple(encoders) +} +assert(e.getMessage.contains("tuple with more than 22 elements are not supported")) + } + private def encodeDecodeTest[T : ExpressionEncoder]( input: T, testName: String): Unit = { - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.4 updated: [SPARK-25863][SPARK-21871][SQL] Check if code size statistics is empty or not in updateAndGetCompilationStats
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.4 by this push: new f7ad4ff [SPARK-25863][SPARK-21871][SQL] Check if code size statistics is empty or not in updateAndGetCompilationStats f7ad4ff is described below commit f7ad4ff040d39c7a55a9e01a990534e55c8178a5 Author: Takeshi Yamamuro AuthorDate: Thu Mar 7 17:25:22 2019 +0900 [SPARK-25863][SPARK-21871][SQL] Check if code size statistics is empty or not in updateAndGetCompilationStats ## What changes were proposed in this pull request? `CodeGenerator.updateAndGetCompilationStats` throws an unsupported exception for empty code size statistics. This pr added code to check if it is empty or not. ## How was this patch tested? Pass Jenkins. Closes #23947 from maropu/SPARK-21871-FOLLOWUP. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../spark/sql/catalyst/expressions/codegen/CodeGenerator.scala | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index d5857e0..812877d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1351,7 +1351,11 @@ object CodeGenerator extends Logging { } }.flatten -codeSizes.max +if (codeSizes.nonEmpty) { + codeSizes.max +} else { + 0 +} } /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.3 updated: [SPARK-25863][SPARK-21871][SQL] Check if code size statistics is empty or not in updateAndGetCompilationStats
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.3 by this push: new dfde0c6 [SPARK-25863][SPARK-21871][SQL] Check if code size statistics is empty or not in updateAndGetCompilationStats dfde0c6 is described below commit dfde0c6501637cce4704ee0edd146a73f9119305 Author: Takeshi Yamamuro AuthorDate: Thu Mar 7 17:25:22 2019 +0900 [SPARK-25863][SPARK-21871][SQL] Check if code size statistics is empty or not in updateAndGetCompilationStats ## What changes were proposed in this pull request? `CodeGenerator.updateAndGetCompilationStats` throws an unsupported exception for empty code size statistics. This pr added code to check if it is empty or not. ## How was this patch tested? Pass Jenkins. Closes #23947 from maropu/SPARK-21871-FOLLOWUP. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../spark/sql/catalyst/expressions/codegen/CodeGenerator.scala | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 9cf5839..d9eb142 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -1472,7 +1472,11 @@ object CodeGenerator extends Logging { } }.flatten -codeSizes.max +if (codeSizes.nonEmpty) { + codeSizes.max +} else { + 0 +} } /** - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r32501 - in /dev/spark: v2.3.3-rc1-bin/ v2.3.3-rc1-docs/
Author: yamamuro Date: Fri Feb 15 05:37:52 2019 New Revision: 32501 Log: Removing RC artifacts. Removed: dev/spark/v2.3.3-rc1-bin/ dev/spark/v2.3.3-rc1-docs/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] tag v2.3.3 created (now 66fd9c3)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to tag v2.3.3 in repository https://gitbox.apache.org/repos/asf/spark.git. at 66fd9c3 (commit) No new revisions were added by this update. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-26897][SQL][TEST] Update Spark 2.3.x testing from HiveExternalCatalogVersionsSuite
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new dcdbd06 [SPARK-26897][SQL][TEST] Update Spark 2.3.x testing from HiveExternalCatalogVersionsSuite dcdbd06 is described below commit dcdbd06b687fafbf29df504949db0a5f77608c8e Author: Takeshi Yamamuro AuthorDate: Mon Feb 18 08:05:49 2019 +0900 [SPARK-26897][SQL][TEST] Update Spark 2.3.x testing from HiveExternalCatalogVersionsSuite ## What changes were proposed in this pull request? The maintenance release of `branch-2.3` (v2.3.3) vote passed, so this issue updates PROCESS_TABLES.testingVersions in HiveExternalCatalogVersionsSuite ## How was this patch tested? Pass the Jenkins. Closes #23807 from maropu/SPARK-26897. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- .../org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index dd0e1bd..8086f75 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -206,7 +206,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { object PROCESS_TABLES extends QueryTest with SQLTestUtils { // Tests the latest version of every release line. - val testingVersions = Seq("2.3.2", "2.4.0") + val testingVersions = Seq("2.3.3", "2.4.0") protected var spark: SparkSession = _ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.4 updated: [SPARK-26897][SQL][TEST] Update Spark 2.3.x testing from HiveExternalCatalogVersionsSuite
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.4 by this push: new dfda97a [SPARK-26897][SQL][TEST] Update Spark 2.3.x testing from HiveExternalCatalogVersionsSuite dfda97a is described below commit dfda97a29f1211384503343d27afd752cc98f578 Author: Takeshi Yamamuro AuthorDate: Mon Feb 18 08:05:49 2019 +0900 [SPARK-26897][SQL][TEST] Update Spark 2.3.x testing from HiveExternalCatalogVersionsSuite ## What changes were proposed in this pull request? The maintenance release of `branch-2.3` (v2.3.3) vote passed, so this issue updates PROCESS_TABLES.testingVersions in HiveExternalCatalogVersionsSuite ## How was this patch tested? Pass the Jenkins. Closes #23807 from maropu/SPARK-26897. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro (cherry picked from commit dcdbd06b687fafbf29df504949db0a5f77608c8e) Signed-off-by: Takeshi Yamamuro --- .../org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala| 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala index 632a21a..598b08b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveExternalCatalogVersionsSuite.scala @@ -203,7 +203,7 @@ class HiveExternalCatalogVersionsSuite extends SparkSubmitTestUtils { object PROCESS_TABLES extends QueryTest with SQLTestUtils { // Tests the latest version of every release line. - val testingVersions = Seq("2.3.2", "2.4.0") + val testingVersions = Seq("2.3.3", "2.4.0") protected var spark: SparkSession = _ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-26798][SQL] HandleNullInputsForUDF should trust nullability
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new f502e20 [SPARK-26798][SQL] HandleNullInputsForUDF should trust nullability f502e20 is described below commit f502e209f49d3d76f947d1a8ba38c8c8a86e0bef Author: Wenchen Fan AuthorDate: Wed Feb 13 14:07:03 2019 +0900 [SPARK-26798][SQL] HandleNullInputsForUDF should trust nullability ## What changes were proposed in this pull request? There is a very old TODO in `HandleNullInputsForUDF`, saying that we can skip the null check if input is not nullable. We leverage the nullability info at many places, we can trust it here too. ## How was this patch tested? re-enable an ignored test Closes #23712 from cloud-fan/minor. Lead-authored-by: Wenchen Fan Co-authored-by: Xiao Li Signed-off-by: Takeshi Yamamuro --- .../spark/sql/catalyst/analysis/Analyzer.scala | 47 - .../spark/sql/catalyst/expressions/ScalaUDF.scala | 9 ++-- .../sql/catalyst/analysis/AnalysisSuite.scala | 36 +--- .../sql/catalyst/expressions/ScalaUDFSuite.scala | 6 +-- .../spark/sql/catalyst/trees/TreeNodeSuite.scala | 2 +- .../org/apache/spark/sql/UDFRegistration.scala | 48 +++--- .../datasources/FileFormatDataWriter.scala | 2 +- .../sql/expressions/UserDefinedFunction.scala | 8 ++-- .../test/scala/org/apache/spark/sql/UDFSuite.scala | 37 - 9 files changed, 114 insertions(+), 81 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index a84bb76..793c337 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2147,27 +2147,36 @@ class Analyzer( case p => p transformExpressionsUp { -case udf @ ScalaUDF(_, _, inputs, inputsNullSafe, _, _, _, _) -if inputsNullSafe.contains(false) => +case udf @ ScalaUDF(_, _, inputs, inputPrimitives, _, _, _, _) +if inputPrimitives.contains(true) => // Otherwise, add special handling of null for fields that can't accept null. // The result of operations like this, when passed null, is generally to return null. - assert(inputsNullSafe.length == inputs.length) - - // TODO: skip null handling for not-nullable primitive inputs after we can completely - // trust the `nullable` information. - val inputsNullCheck = inputsNullSafe.zip(inputs) -.filter { case (nullSafe, _) => !nullSafe } -.map { case (_, expr) => IsNull(expr) } -.reduceLeftOption[Expression]((e1, e2) => Or(e1, e2)) - // Once we add an `If` check above the udf, it is safe to mark those checked inputs - // as null-safe (i.e., set `inputsNullSafe` all `true`), because the null-returning - // branch of `If` will be called if any of these checked inputs is null. Thus we can - // prevent this rule from being applied repeatedly. - val newInputsNullSafe = inputsNullSafe.map(_ => true) - inputsNullCheck -.map(If(_, Literal.create(null, udf.dataType), - udf.copy(inputsNullSafe = newInputsNullSafe))) -.getOrElse(udf) + assert(inputPrimitives.length == inputs.length) + + val inputPrimitivesPair = inputPrimitives.zip(inputs) + val inputNullCheck = inputPrimitivesPair.collect { +case (isPrimitive, input) if isPrimitive && input.nullable => + IsNull(input) + }.reduceLeftOption[Expression](Or) + + if (inputNullCheck.isDefined) { +// Once we add an `If` check above the udf, it is safe to mark those checked inputs +// as null-safe (i.e., wrap with `KnownNotNull`), because the null-returning +// branch of `If` will be called if any of these checked inputs is null. Thus we can +// prevent this rule from being applied repeatedly. +val newInputs = inputPrimitivesPair.map { + case (isPrimitive, input) => +if (isPrimitive && input.nullable) { + KnownNotNull(input) +} else { + input +} +} +val newUDF = udf.copy(children = newInputs) +If(inputNullCheck.get, Literal.create(null, udf.dataType), newUDF) + } else { +udf + } } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressio
[spark] 02/02: [SPARK-23416][SS] Add a specific stop method for ContinuousExecution.
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit 55d5a19c8e01de945c4c9e42752ed132df4b9110 Author: Jose Torres AuthorDate: Wed May 23 17:21:29 2018 -0700 [SPARK-23416][SS] Add a specific stop method for ContinuousExecution. ## What changes were proposed in this pull request? Add a specific stop method for ContinuousExecution. The previous StreamExecution.stop() method had a race condition as applied to continuous processing: if the cancellation was round-tripped to the driver too quickly, the generic SparkException it caused would be reported as the query death cause. We earlier decided that SparkException should not be added to the StreamExecution.isInterruptionException() whitelist, so we need to ensure this never happens instead. ## How was this patch tested? Existing tests. I could consistently reproduce the previous flakiness by putting Thread.sleep(1000) between the first job cancellation and thread interruption in StreamExecution.stop(). Author: Jose Torres Closes #21384 from jose-torres/fixKafka. --- .../sql/execution/streaming/MicroBatchExecution.scala | 18 ++ .../sql/execution/streaming/StreamExecution.scala | 18 -- .../streaming/continuous/ContinuousExecution.scala | 16 3 files changed, 34 insertions(+), 18 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala index 8bf1dd3..7f09bd5 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala @@ -111,6 +111,24 @@ class MicroBatchExecution( } /** + * Signals to the thread executing micro-batches that it should stop running after the next + * batch. This method blocks until the thread stops running. + */ + override def stop(): Unit = { +// Set the state to TERMINATED so that the batching thread knows that it was interrupted +// intentionally +state.set(TERMINATED) +if (queryExecutionThread.isAlive) { + sparkSession.sparkContext.cancelJobGroup(runId.toString) + queryExecutionThread.interrupt() + queryExecutionThread.join() + // microBatchThread may spawn new jobs, so we need to cancel again to prevent a leak + sparkSession.sparkContext.cancelJobGroup(runId.toString) +} +logInfo(s"Query $prettyIdString was stopped") + } + + /** * Repeatedly attempts to run batches as data arrives. */ protected def runActivatedStream(sparkSessionForStream: SparkSession): Unit = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index 3fc8c78..290de87 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala @@ -379,24 +379,6 @@ abstract class StreamExecution( } /** - * Signals to the thread executing micro-batches that it should stop running after the next - * batch. This method blocks until the thread stops running. - */ - override def stop(): Unit = { -// Set the state to TERMINATED so that the batching thread knows that it was interrupted -// intentionally -state.set(TERMINATED) -if (queryExecutionThread.isAlive) { - sparkSession.sparkContext.cancelJobGroup(runId.toString) - queryExecutionThread.interrupt() - queryExecutionThread.join() - // microBatchThread may spawn new jobs, so we need to cancel again to prevent a leak - sparkSession.sparkContext.cancelJobGroup(runId.toString) -} -logInfo(s"Query $prettyIdString was stopped") - } - - /** * Blocks the current thread until processing for data from the given `source` has reached at * least the given `Offset`. This method is intended for use primarily when writing tests. */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index 11df2c2..62adedb 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -362,6 +362,22 @@ class ContinuousExecution( } } } + + /** + * Stops the query execution thread to terminate the query. + */ + overr
[spark] branch branch-2.3 updated (abce846 -> 55d5a19)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git. from abce846 [SPARK-23408][SS][BRANCH-2.3] Synchronize successive AddData actions in Streaming*JoinSuite new 7f13fd0 [SPARK-23491][SS] Remove explicit job cancellation from ContinuousExecution reconfiguring new 55d5a19 [SPARK-23416][SS] Add a specific stop method for ContinuousExecution. The 2 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: .../execution/streaming/MicroBatchExecution.scala | 18 +++ .../sql/execution/streaming/StreamExecution.scala | 18 --- .../streaming/continuous/ContinuousExecution.scala | 26 -- 3 files changed, 42 insertions(+), 20 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/02: [SPARK-23491][SS] Remove explicit job cancellation from ContinuousExecution reconfiguring
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit 7f13fd0c5a79ab21c4ace2445127e6c69a7f745c Author: Jose Torres AuthorDate: Mon Feb 26 11:28:44 2018 -0800 [SPARK-23491][SS] Remove explicit job cancellation from ContinuousExecution reconfiguring ## What changes were proposed in this pull request? Remove queryExecutionThread.interrupt() from ContinuousExecution. As detailed in the JIRA, interrupting the thread is only relevant in the microbatch case; for continuous processing the query execution can quickly clean itself up without. ## How was this patch tested? existing tests Author: Jose Torres Closes #20622 from jose-torres/SPARK-23441. --- .../execution/streaming/continuous/ContinuousExecution.scala | 10 -- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala index c3294d6..11df2c2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala @@ -236,9 +236,7 @@ class ContinuousExecution( startTrigger() if (reader.needsReconfiguration() && state.compareAndSet(ACTIVE, RECONFIGURING)) { - stopSources() if (queryExecutionThread.isAlive) { -sparkSession.sparkContext.cancelJobGroup(runId.toString) queryExecutionThread.interrupt() } false @@ -266,12 +264,20 @@ class ContinuousExecution( SQLExecution.withNewExecutionId( sparkSessionForQuery, lastExecution)(lastExecution.toRdd) } +} catch { + case t: Throwable + if StreamExecution.isInterruptionException(t) && state.get() == RECONFIGURING => +logInfo(s"Query $id ignoring exception from reconfiguring: $t") +// interrupted by reconfiguration - swallow exception so we can restart the query } finally { epochEndpoint.askSync[Unit](StopContinuousExecutionWrites) SparkEnv.get.rpcEnv.stop(epochEndpoint) epochUpdateThread.interrupt() epochUpdateThread.join() + + stopSources() + sparkSession.sparkContext.cancelJobGroup(runId.toString) } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-26857][SQL] Return UnsafeArrayData for date/timestamp type in ColumnarArray.copy()
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 72a349a [SPARK-26857][SQL] Return UnsafeArrayData for date/timestamp type in ColumnarArray.copy() 72a349a is described below commit 72a349a95d7661cd442851b62e7e830f213ed05c Author: Gengliang Wang AuthorDate: Wed Feb 13 10:23:31 2019 +0900 [SPARK-26857][SQL] Return UnsafeArrayData for date/timestamp type in ColumnarArray.copy() ## What changes were proposed in this pull request? In https://github.com/apache/spark/issues/23569, the copy method of `ColumnarArray` is implemented. To further improve it, we can return `UnsafeArrayData` for `date`/`timestamp` type in `ColumnarArray.copy()`. ## How was this patch tested? Unit test Closes #23761 from gengliangwang/copyDateAndTS. Authored-by: Gengliang Wang Signed-off-by: Takeshi Yamamuro --- .../apache/spark/sql/vectorized/ColumnarArray.java | 4 ++-- .../execution/vectorized/ColumnVectorSuite.scala | 28 ++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java index 1471627..8dc7b11 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java +++ b/sql/core/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java @@ -56,9 +56,9 @@ public final class ColumnarArray extends ArrayData { return UnsafeArrayData.fromPrimitiveArray(toByteArray()); } else if (dt instanceof ShortType) { return UnsafeArrayData.fromPrimitiveArray(toShortArray()); -} else if (dt instanceof IntegerType) { +} else if (dt instanceof IntegerType || dt instanceof DateType) { return UnsafeArrayData.fromPrimitiveArray(toIntArray()); -} else if (dt instanceof LongType) { +} else if (dt instanceof LongType || dt instanceof TimestampType) { return UnsafeArrayData.fromPrimitiveArray(toLongArray()); } else if (dt instanceof FloatType) { return UnsafeArrayData.fromPrimitiveArray(toFloatArray()); diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala index 866fcb1..c2e783d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala @@ -108,6 +108,20 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { } } + testVectors("date", 10, DateType) { testVector => +(0 until 10).foreach { i => + testVector.appendInt(i) +} + +val array = new ColumnarArray(testVector, 0, 10) +val arrayCopy = array.copy() + +(0 until 10).foreach { i => + assert(array.get(i, DateType) === i) + assert(arrayCopy.get(i, DateType) === i) +} + } + testVectors("long", 10, LongType) { testVector => (0 until 10).foreach { i => testVector.appendLong(i) @@ -122,6 +136,20 @@ class ColumnVectorSuite extends SparkFunSuite with BeforeAndAfterEach { } } + testVectors("timestamp", 10, TimestampType) { testVector => +(0 until 10).foreach { i => + testVector.appendLong(i) +} + +val array = new ColumnarArray(testVector, 0, 10) +val arrayCopy = array.copy() + +(0 until 10).foreach { i => + assert(array.get(i, TimestampType) === i) + assert(arrayCopy.get(i, TimestampType) === i) +} + } + testVectors("float", 10, FloatType) { testVector => (0 until 10).foreach { i => testVector.appendFloat(i.toFloat) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-26798][SQL] HandleNullInputsForUDF should trust nullability
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 974f524 [SPARK-26798][SQL] HandleNullInputsForUDF should trust nullability 974f524 is described below commit 974f52499271848705c9eb209373f27d808fb72b Author: Wenchen Fan AuthorDate: Thu Feb 14 00:22:11 2019 +0900 [SPARK-26798][SQL] HandleNullInputsForUDF should trust nullability ## What changes were proposed in this pull request? There is a very old TODO in `HandleNullInputsForUDF`, saying that we can skip the null check if input is not nullable. We leverage the nullability info at many places, we can trust it here too. ## How was this patch tested? re-enable an ignored test Closes #23712 from cloud-fan/minor. Lead-authored-by: Wenchen Fan Co-authored-by: Xiao Li Signed-off-by: Takeshi Yamamuro --- .../spark/sql/catalyst/analysis/Analyzer.scala | 47 - .../spark/sql/catalyst/expressions/ScalaUDF.scala | 9 ++-- .../sql/catalyst/analysis/AnalysisSuite.scala | 36 +--- .../sql/catalyst/expressions/ScalaUDFSuite.scala | 6 +-- .../spark/sql/catalyst/trees/TreeNodeSuite.scala | 2 +- .../org/apache/spark/sql/UDFRegistration.scala | 48 +++--- .../datasources/FileFormatDataWriter.scala | 2 +- .../sql/expressions/UserDefinedFunction.scala | 8 ++-- .../test/scala/org/apache/spark/sql/UDFSuite.scala | 37 - 9 files changed, 114 insertions(+), 81 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index a84bb76..793c337 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -2147,27 +2147,36 @@ class Analyzer( case p => p transformExpressionsUp { -case udf @ ScalaUDF(_, _, inputs, inputsNullSafe, _, _, _, _) -if inputsNullSafe.contains(false) => +case udf @ ScalaUDF(_, _, inputs, inputPrimitives, _, _, _, _) +if inputPrimitives.contains(true) => // Otherwise, add special handling of null for fields that can't accept null. // The result of operations like this, when passed null, is generally to return null. - assert(inputsNullSafe.length == inputs.length) - - // TODO: skip null handling for not-nullable primitive inputs after we can completely - // trust the `nullable` information. - val inputsNullCheck = inputsNullSafe.zip(inputs) -.filter { case (nullSafe, _) => !nullSafe } -.map { case (_, expr) => IsNull(expr) } -.reduceLeftOption[Expression]((e1, e2) => Or(e1, e2)) - // Once we add an `If` check above the udf, it is safe to mark those checked inputs - // as null-safe (i.e., set `inputsNullSafe` all `true`), because the null-returning - // branch of `If` will be called if any of these checked inputs is null. Thus we can - // prevent this rule from being applied repeatedly. - val newInputsNullSafe = inputsNullSafe.map(_ => true) - inputsNullCheck -.map(If(_, Literal.create(null, udf.dataType), - udf.copy(inputsNullSafe = newInputsNullSafe))) -.getOrElse(udf) + assert(inputPrimitives.length == inputs.length) + + val inputPrimitivesPair = inputPrimitives.zip(inputs) + val inputNullCheck = inputPrimitivesPair.collect { +case (isPrimitive, input) if isPrimitive && input.nullable => + IsNull(input) + }.reduceLeftOption[Expression](Or) + + if (inputNullCheck.isDefined) { +// Once we add an `If` check above the udf, it is safe to mark those checked inputs +// as null-safe (i.e., wrap with `KnownNotNull`), because the null-returning +// branch of `If` will be called if any of these checked inputs is null. Thus we can +// prevent this rule from being applied repeatedly. +val newInputs = inputPrimitivesPair.map { + case (isPrimitive, input) => +if (isPrimitive && input.nullable) { + KnownNotNull(input) +} else { + input +} +} +val newUDF = udf.copy(children = newInputs) +If(inputNullCheck.get, Literal.create(null, udf.dataType), newUDF) + } else { +udf + } } } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressio
[spark] branch master updated: [SPARK-26732][CORE][TEST] Wait for listener bus to process events in SparkContextInfoSuite.
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 6a2f3dc [SPARK-26732][CORE][TEST] Wait for listener bus to process events in SparkContextInfoSuite. 6a2f3dc is described below commit 6a2f3dcc2bd601fd1fe7610854bc0f5bf90300f4 Author: Marcelo Vanzin AuthorDate: Thu Jan 31 00:10:23 2019 +0900 [SPARK-26732][CORE][TEST] Wait for listener bus to process events in SparkContextInfoSuite. Otherwise the RDD data may be out of date by the time the test tries to check it. Tested with an artificial delay inserted in AppStatusListener. Closes #23654 from vanzin/SPARK-26732. Authored-by: Marcelo Vanzin Signed-off-by: Takeshi Yamamuro --- core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala index 8feb3de..051a13c 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala @@ -60,6 +60,7 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext { val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache() assert(sc.getRDDStorageInfo.size === 0) rdd.collect() +sc.listenerBus.waitUntilEmpty(1) assert(sc.getRDDStorageInfo.size === 1) assert(sc.getRDDStorageInfo.head.isCached) assert(sc.getRDDStorageInfo.head.memSize > 0) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.4 updated: [SPARK-26732][CORE][TEST] Wait for listener bus to process events in SparkContextInfoSuite.
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.4 by this push: new 710d81e [SPARK-26732][CORE][TEST] Wait for listener bus to process events in SparkContextInfoSuite. 710d81e is described below commit 710d81ea87b5ba0c3d49b3dfbc591129685c2a13 Author: Marcelo Vanzin AuthorDate: Thu Jan 31 00:10:23 2019 +0900 [SPARK-26732][CORE][TEST] Wait for listener bus to process events in SparkContextInfoSuite. Otherwise the RDD data may be out of date by the time the test tries to check it. Tested with an artificial delay inserted in AppStatusListener. Closes #23654 from vanzin/SPARK-26732. Authored-by: Marcelo Vanzin Signed-off-by: Takeshi Yamamuro (cherry picked from commit 6a2f3dcc2bd601fd1fe7610854bc0f5bf90300f4) Signed-off-by: Takeshi Yamamuro --- core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala index 8feb3de..051a13c 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala @@ -60,6 +60,7 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext { val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache() assert(sc.getRDDStorageInfo.size === 0) rdd.collect() +sc.listenerBus.waitUntilEmpty(1) assert(sc.getRDDStorageInfo.size === 1) assert(sc.getRDDStorageInfo.head.isCached) assert(sc.getRDDStorageInfo.head.memSize > 0) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.3 updated: [SPARK-26732][CORE][TEST] Wait for listener bus to process events in SparkContextInfoSuite.
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.3 by this push: new f6391e1 [SPARK-26732][CORE][TEST] Wait for listener bus to process events in SparkContextInfoSuite. f6391e1 is described below commit f6391e165b8e4dde8a9636fc220042b38eabd056 Author: Marcelo Vanzin AuthorDate: Thu Jan 31 00:10:23 2019 +0900 [SPARK-26732][CORE][TEST] Wait for listener bus to process events in SparkContextInfoSuite. Otherwise the RDD data may be out of date by the time the test tries to check it. Tested with an artificial delay inserted in AppStatusListener. Closes #23654 from vanzin/SPARK-26732. Authored-by: Marcelo Vanzin Signed-off-by: Takeshi Yamamuro (cherry picked from commit 6a2f3dcc2bd601fd1fe7610854bc0f5bf90300f4) Signed-off-by: Takeshi Yamamuro --- core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala index 8feb3de..051a13c 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextInfoSuite.scala @@ -60,6 +60,7 @@ class SparkContextInfoSuite extends SparkFunSuite with LocalSparkContext { val rdd = sc.makeRDD(Array(1, 2, 3, 4), 2).cache() assert(sc.getRDDStorageInfo.size === 0) rdd.collect() +sc.listenerBus.waitUntilEmpty(1) assert(sc.getRDDStorageInfo.size === 1) assert(sc.getRDDStorageInfo.head.isCached) assert(sc.getRDDStorageInfo.head.memSize > 0) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r32335 - in /dev/spark/v2.3.3-rc2-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _site/api/java/org/apache/spark
Author: yamamuro Date: Tue Feb 5 03:33:06 2019 New Revision: 32335 Log: Apache Spark v2.3.3-rc2 docs [This commit notification would consist of 1447 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/01: Preparing Spark release v2.3.3-rc2
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to tag v2.3.3-rc2 in repository https://gitbox.apache.org/repos/asf/spark.git commit 66fd9c34bf406a4b5f86605d06c9607752bd637a Author: Takeshi Yamamuro AuthorDate: Mon Feb 4 12:18:21 2019 + Preparing Spark release v2.3.3-rc2 --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 42 insertions(+), 42 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index a82446e..6ec4966 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.3.4 +Version: 2.3.3 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), diff --git a/assembly/pom.xml b/assembly/pom.xml index 612a1b8..6a8cd4f 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.4-SNAPSHOT +2.3.3 ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 5547e97..6010b6e 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.4-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 119dde2..8b5d3c8 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.4-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index dba5224..dd27a24 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.4-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 56902a3..aded5e7d 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.4-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 5302d95..a50f612 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.4-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 232ebfa..8112ca4 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.4-SNAPSHOT +2.3.3 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index f0baa2a..0d5f61f 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-pa
[spark] tag v2.3.3-rc2 created (now 66fd9c3)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to tag v2.3.3-rc2 in repository https://gitbox.apache.org/repos/asf/spark.git. at 66fd9c3 (commit) This tag includes the following new commits: new 66fd9c3 Preparing Spark release v2.3.3-rc2 The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] 01/01: Preparing development version 2.3.4-SNAPSHOT
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git commit 7845807d76c617fcb738dd9916caca80460db27e Author: Takeshi Yamamuro AuthorDate: Mon Feb 4 12:18:50 2019 + Preparing development version 2.3.4-SNAPSHOT --- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml| 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 42 insertions(+), 42 deletions(-) diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 6ec4966..a82446e 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.3.3 +Version: 2.3.4 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), diff --git a/assembly/pom.xml b/assembly/pom.xml index 6a8cd4f..612a1b8 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../pom.xml diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 6010b6e..5547e97 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 8b5d3c8..119dde2 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index dd27a24..dba5224 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index aded5e7d..56902a3 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index a50f612..5302d95 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 8112ca4..232ebfa 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.3.3 +2.3.4-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 0d5f61f..f0baa2a 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache
[spark] branch branch-2.3 updated (4d6ea2c -> 7845807)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git. from 4d6ea2c [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException add 66fd9c3 Preparing Spark release v2.3.3-rc2 new 7845807 Preparing development version 2.3.4-SNAPSHOT The 1 revisions listed above as "new" are entirely new to this repository and will be described in separate emails. The revisions listed as "add" were already present in the repository and have only been added to this reference. Summary of changes: - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch branch-2.4 updated: [SPARK-26708][SQL][BRANCH-2.4] Incorrect result caused by inconsistency between a SQL cache's cached RDD and its physical plan
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.4 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.4 by this push: new d5cc890 [SPARK-26708][SQL][BRANCH-2.4] Incorrect result caused by inconsistency between a SQL cache's cached RDD and its physical plan d5cc890 is described below commit d5cc8909c72e958ce187df9c75847ad0125991ab Author: maryannxue AuthorDate: Tue Jan 29 21:33:46 2019 +0900 [SPARK-26708][SQL][BRANCH-2.4] Incorrect result caused by inconsistency between a SQL cache's cached RDD and its physical plan ## What changes were proposed in this pull request? When performing non-cascading cache invalidation, `recache` is called on the other cache entries which are dependent on the cache being invalidated. It leads to the the physical plans of those cache entries being re-compiled. For those cache entries, if the cache RDD has already been persisted, chances are there will be inconsistency between the data and the new plan. It can cause a correctness issue if the new plan's `outputPartitioning` or `outputOrdering` is different from the tha [...] The fix is to keep the cache entry as it is if the data has been loaded, otherwise re-build the cache entry, with a new plan and an empty cache buffer. ## How was this patch tested? Added UT. Closes #23678 from maryannxue/spark-26708-2.4. Authored-by: maryannxue Signed-off-by: Takeshi Yamamuro --- .../apache/spark/sql/execution/CacheManager.scala | 28 +++--- .../sql/execution/columnar/InMemoryRelation.scala | 10 + .../org/apache/spark/sql/DatasetCacheSuite.scala | 44 +- 3 files changed, 67 insertions(+), 15 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala index c992993..5b30596 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala @@ -166,16 +166,34 @@ class CacheManager extends Logging { val needToRecache = scala.collection.mutable.ArrayBuffer.empty[CachedData] while (it.hasNext) { val cd = it.next() - if (condition(cd.plan)) { -if (clearCache) { - cd.cachedRepresentation.cacheBuilder.clearCache() -} + // If `clearCache` is false (which means the recache request comes from a non-cascading + // cache invalidation) and the cache buffer has already been loaded, we do not need to + // re-compile a physical plan because the old plan will not be used any more by the + // CacheManager although it still lives in compiled `Dataset`s and it could still work. + // Otherwise, it means either `clearCache` is true, then we have to clear the cache buffer + // and re-compile the physical plan; or it is a non-cascading cache invalidation and cache + // buffer is still empty, then we could have a more efficient new plan by removing + // dependency on the previously removed cache entries. + // Note that the `CachedRDDBuilder`.`isCachedColumnBuffersLoaded` call is a non-locking + // status test and may not return the most accurate cache buffer state. So the worse case + // scenario can be: + // 1) The buffer has been loaded, but `isCachedColumnBuffersLoaded` returns false, then we + //will clear the buffer and build a new plan. It is inefficient but doesn't affect + //correctness. + // 2) The buffer has been cleared, but `isCachedColumnBuffersLoaded` returns true, then we + //will keep it as it is. It means the physical plan has been re-compiled already in the + //other thread. + val buildNewPlan = +clearCache || !cd.cachedRepresentation.cacheBuilder.isCachedColumnBuffersLoaded + if (condition(cd.plan) && buildNewPlan) { +cd.cachedRepresentation.cacheBuilder.clearCache() // Remove the cache entry before we create a new one, so that we can have a different // physical plan. it.remove() val plan = spark.sessionState.executePlan(cd.plan).executedPlan val newCache = InMemoryRelation( - cacheBuilder = cd.cachedRepresentation.cacheBuilder.withCachedPlan(plan), + cacheBuilder = cd.cachedRepresentation +.cacheBuilder.copy(cachedPlan = plan)(_cachedColumnBuffers = null), logicalPlan = cd.plan) needToRecache += cd.copy(cachedRepresentation = newCache) } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala index b752b77..8eecd7a 100644 --- a/sql/core/src/ma
[spark] branch branch-2.3 updated: [SPARK-26709][SQL][BRANCH-2.3] OptimizeMetadataOnlyQuery does not handle empty records correctly
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-2.3 in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/branch-2.3 by this push: new f98aee4 [SPARK-26709][SQL][BRANCH-2.3] OptimizeMetadataOnlyQuery does not handle empty records correctly f98aee4 is described below commit f98aee4d63ca4a51fb7d98e1d36f8a82d62cf378 Author: Gengliang Wang AuthorDate: Sat Jan 26 09:26:12 2019 +0900 [SPARK-26709][SQL][BRANCH-2.3] OptimizeMetadataOnlyQuery does not handle empty records correctly ## What changes were proposed in this pull request? When reading from empty tables, the optimization `OptimizeMetadataOnlyQuery` may return wrong results: ``` sql("CREATE TABLE t (col1 INT, p1 INT) USING PARQUET PARTITIONED BY (p1)") sql("INSERT INTO TABLE t PARTITION (p1 = 5) SELECT ID FROM range(1, 1)") sql("SELECT MAX(p1) FROM t") ``` The result is supposed to be `null`. However, with the optimization the result is `5`. The rule is originally ported from https://issues.apache.org/jira/browse/HIVE-1003 in #13494. In Hive, the rule is disabled by default in a later release(https://issues.apache.org/jira/browse/HIVE-15397), due to the same problem. It is hard to completely avoid the correctness issue. Because data sources like Parquet can be metadata-only. Spark can't tell whether it is empty or not without actually reading it. This PR disable the optimization by default. ## How was this patch tested? Unit test Closes #23648 from gengliangwang/SPARK-26709. Authored-by: Gengliang Wang Signed-off-by: Takeshi Yamamuro --- docs/sql-programming-guide.md | 12 --- .../org/apache/spark/sql/internal/SQLConf.scala| 6 -- .../sql/execution/OptimizeMetadataOnlyQuery.scala | 5 + .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 25 ++ .../spark/sql/hive/execution/SQLQuerySuite.scala | 17 +++ 5 files changed, 51 insertions(+), 14 deletions(-) diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index e5fa4c6..038c1ec 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -990,18 +990,6 @@ Configuration of Parquet can be done using the `setConf` method on `SparkSession - - spark.sql.optimizer.metadataOnly - true - - - When true, enable the metadata-only query optimization that use the table's metadata to - produce the partition columns instead of table scans. It applies when all the columns scanned - are partition columns and the query has an aggregate operator that satisfies distinct - semantics. - - - ## ORC Files diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 731d4e3..c77c4f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -469,12 +469,14 @@ object SQLConf { .createWithDefault(HiveCaseSensitiveInferenceMode.INFER_AND_SAVE.toString) val OPTIMIZER_METADATA_ONLY = buildConf("spark.sql.optimizer.metadataOnly") +.internal() .doc("When true, enable the metadata-only query optimization that use the table's metadata " + "to produce the partition columns instead of table scans. It applies when all the columns " + "scanned are partition columns and the query has an aggregate operator that satisfies " + - "distinct semantics.") + "distinct semantics. By default the optimization is disabled, since it may return " + + "incorrect results when the files are empty.") .booleanConf -.createWithDefault(true) +.createWithDefault(false) val COLUMN_NAME_OF_CORRUPT_RECORD = buildConf("spark.sql.columnNameOfCorruptRecord") .doc("The name of internal column for storing raw/un-parsed JSON and CSV records that fail " + diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala index dc4aff9..fff32c8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala @@ -67,6 +67,11 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic }) } if (isAllDistinctAgg) { +logWarning("Since configuration `spark.sql.optimizer.metadataOnly` is enabled, " + + "Spark will scan pa
svn commit: r32334 - /dev/spark/v2.3.3-rc2-bin/
Author: yamamuro Date: Mon Feb 4 20:57:11 2019 New Revision: 32334 Log: Apache Spark v2.3.3-rc2 Added: dev/spark/v2.3.3-rc2-bin/ dev/spark/v2.3.3-rc2-bin/SparkR_2.3.3.tar.gz (with props) dev/spark/v2.3.3-rc2-bin/SparkR_2.3.3.tar.gz.asc dev/spark/v2.3.3-rc2-bin/SparkR_2.3.3.tar.gz.sha512 dev/spark/v2.3.3-rc2-bin/pyspark-2.3.3.tar.gz (with props) dev/spark/v2.3.3-rc2-bin/pyspark-2.3.3.tar.gz.asc dev/spark/v2.3.3-rc2-bin/pyspark-2.3.3.tar.gz.sha512 dev/spark/v2.3.3-rc2-bin/spark-2.3.3-bin-hadoop2.6.tgz (with props) dev/spark/v2.3.3-rc2-bin/spark-2.3.3-bin-hadoop2.6.tgz.asc dev/spark/v2.3.3-rc2-bin/spark-2.3.3-bin-hadoop2.6.tgz.sha512 dev/spark/v2.3.3-rc2-bin/spark-2.3.3-bin-hadoop2.7.tgz (with props) dev/spark/v2.3.3-rc2-bin/spark-2.3.3-bin-hadoop2.7.tgz.asc dev/spark/v2.3.3-rc2-bin/spark-2.3.3-bin-hadoop2.7.tgz.sha512 dev/spark/v2.3.3-rc2-bin/spark-2.3.3-bin-without-hadoop.tgz (with props) dev/spark/v2.3.3-rc2-bin/spark-2.3.3-bin-without-hadoop.tgz.asc dev/spark/v2.3.3-rc2-bin/spark-2.3.3-bin-without-hadoop.tgz.sha512 dev/spark/v2.3.3-rc2-bin/spark-2.3.3.tgz (with props) dev/spark/v2.3.3-rc2-bin/spark-2.3.3.tgz.asc dev/spark/v2.3.3-rc2-bin/spark-2.3.3.tgz.sha512 Added: dev/spark/v2.3.3-rc2-bin/SparkR_2.3.3.tar.gz == Binary file - no diff available. Propchange: dev/spark/v2.3.3-rc2-bin/SparkR_2.3.3.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v2.3.3-rc2-bin/SparkR_2.3.3.tar.gz.asc == --- dev/spark/v2.3.3-rc2-bin/SparkR_2.3.3.tar.gz.asc (added) +++ dev/spark/v2.3.3-rc2-bin/SparkR_2.3.3.tar.gz.asc Mon Feb 4 20:57:11 2019 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v1 + +iQIcBAABAgAGBQJcWF7vAAoJEG7F8QUt8I/0PpUP/iYfMl7Bdf+nIXEVvjZoCu7c +yJA3jvXf2JEju+hHZW88C596HFc8m+e+UDYM2uth3F7K84+0gM2zNCOVN4HQ748Q +wtL38cnNJHaKwKw8XaesQyeHa/bLf8Ze0zv5YOTMOlyZiTX59W84H/xS0AWsvv8P +OvgkYZhTmy3soWKk0A/5hUvdjyA+W52MpEwzXw/UQikvPMpjzIeGdEcZHWdEmelx +71AiEnlNj/e7byNH6IawqrRBBJDpqm8MNNM60+tP6tIGCEotICWfiVqO9iv/tDXJ +27igne+MI1XyVWyiDYT8FqlG+vf+HkLn16tKKtJW90nFaFCSTLavHpVsiRtozcu1 +T+Vl0FZjRJepS/pfd4eWhA6z6RgJZCNzJW/W3dr7Xriwc+TMsPaI5ttu9oT52U88 ++il8kP/wHDdk498UympcGbEpAwjEBUA5/OCUgBIUW8Zkd46nCwiMzodGO/vEbklj +dv4NfvRe50+ht0KJIlkbA4xYCNspek8d3jK9MWAi02MpE5Myr7iYnd45XrNHeSV1 +zDCXEJ0/D5vaNc4kFLGbxIUfCyR3f4FG3arfuuLSjTMfyNiafv12C0+Df1Z3HbhD +f0rxfdRY9M9JTMcIAGkmQwWnQ/nApdBzWOyXKMHQcXtyFd73q5xzJHvII70YNWmT +T+riYyEIAA0uim6BR0Ij +=E1BG +-END PGP SIGNATURE- Added: dev/spark/v2.3.3-rc2-bin/SparkR_2.3.3.tar.gz.sha512 == --- dev/spark/v2.3.3-rc2-bin/SparkR_2.3.3.tar.gz.sha512 (added) +++ dev/spark/v2.3.3-rc2-bin/SparkR_2.3.3.tar.gz.sha512 Mon Feb 4 20:57:11 2019 @@ -0,0 +1,3 @@ +SparkR_2.3.3.tar.gz: E6103CD0 38D2A402 CD16BD1F 70C28B23 67A84DAB 408E11AA + 3050A184 6C904BAD BC46DB3A C26A9FEB 8D131AB4 A13FC258 + 9369784B 9B434E6F 5B946FAD CE1B1C1E Added: dev/spark/v2.3.3-rc2-bin/pyspark-2.3.3.tar.gz == Binary file - no diff available. Propchange: dev/spark/v2.3.3-rc2-bin/pyspark-2.3.3.tar.gz -- svn:mime-type = application/octet-stream Added: dev/spark/v2.3.3-rc2-bin/pyspark-2.3.3.tar.gz.asc == --- dev/spark/v2.3.3-rc2-bin/pyspark-2.3.3.tar.gz.asc (added) +++ dev/spark/v2.3.3-rc2-bin/pyspark-2.3.3.tar.gz.asc Mon Feb 4 20:57:11 2019 @@ -0,0 +1,17 @@ +-BEGIN PGP SIGNATURE- +Version: GnuPG v1 + +iQIcBAABAgAGBQJcWF70AAoJEG7F8QUt8I/0hlAQAJTRur8YVL8+gBuQA8bBnq1H +eCKLQ+twsKuZzohyBXT/fRMeck6uf+4CChnF3zpJFfGtzx7WfIg+pSJk1taes7L/ +fjnY7EwEk59e6Bb4ly4mAkPNUeNg01YHRD4+UqFqxH79a7S/ienqsBDie50gedds +or4YSwH1HE2h8DGQGLZZmy/b9kwaBhQmKlyBklxvAzKHmNPdgu4J+oTuojpEsN7X +4hz5skTPuqQs5ZUgpCQzc911q2fN67n2AipWmjSJEurGIeTqeKZrLWy4tl2USYsC +WXjFkKdMysJcPW0j+UILtLVcYT+KVRxRCUjbPw0wtXTY7zZq0q42zzw15VbUjiTj +GKPt84PCqt85tFuXlYr8HZuGbYNJceu8PYP66lYF+tifY7Sr8bTG1D6HKvLbt2Sr +fiLbv/mZt4CwcihfwVMjNnHuenUeCsHlq82frfOBrIVeaa0z1/1l4+Zv8L/N1ImI +JZaCO3f0oDky/F8gSTMUE0egBXIjzy6UHjheLVZriXJ2GFxrBOBqLce1PqiK0kiW +goYVM704P7dAH5BhNpCWozxwiyz+4jGmihkgOLKprpH6Gns1yOC8SGFjFFbSWQHR +kRDu/UxolfhpSyNjMtNcAxMnK9EDKT5HTbG4G9iG2U1CIkWEX/ZejViC2y3twC4r +wG2OzGq/sJfQJIZj/iAM +=+xeD +-END PGP SIGNATURE- Added: dev/spark/v2.3.3-rc2-bin/pyspark-2.3.3.tar.gz.sha512 == --- dev/spark/v2.3.3-rc2-bin/pyspark-2.3.3.tar.gz.sha512 (added) +++ dev/spark/v2.3.3-rc2-bin/pyspark-2.3.3.tar.gz.sha512
[spark] branch master updated: [SPARK-23264][SQL] Make INTERVAL keyword optional in INTERVAL clauses when ANSI mode enabled
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new bacffb8 [SPARK-23264][SQL] Make INTERVAL keyword optional in INTERVAL clauses when ANSI mode enabled bacffb8 is described below commit bacffb8810434b36fdb6fdd622a0f8c8d99ee5ab Author: Takeshi Yamamuro AuthorDate: Thu Mar 14 10:45:29 2019 +0900 [SPARK-23264][SQL] Make INTERVAL keyword optional in INTERVAL clauses when ANSI mode enabled ## What changes were proposed in this pull request? This pr updated parsing rules in `SqlBase.g4` to support a SQL query below when ANSI mode enabled; ``` SELECT CAST('2017-08-04' AS DATE) + 1 days; ``` The current master cannot parse it though, other dbms-like systems support the syntax (e.g., hive and mysql). Also, the syntax is frequently used in the official TPC-DS queries. This pr added new tokens as follows; ``` YEAR | YEARS | MONTH | MONTHS | WEEK | WEEKS | DAY | DAYS | HOUR | HOURS | MINUTE MINUTES | SECOND | SECONDS | MILLISECOND | MILLISECONDS | MICROSECOND | MICROSECONDS ``` Then, it registered the keywords below as the ANSI reserved (this follows SQL-2011); ``` DAY | HOUR | MINUTE | MONTH | SECOND | YEAR ``` ## How was this patch tested? Added tests in `SQLQuerySuite`, `ExpressionParserSuite`, and `TableIdentifierParserSuite`. Closes #20433 from maropu/SPARK-23264. Authored-by: Takeshi Yamamuro Signed-off-by: Takeshi Yamamuro --- docs/sql-reserved-and-non-reserved-keywords.md | 24 +- .../apache/spark/sql/catalyst/parser/SqlBase.g4| 74 +++- .../catalyst/parser/ExpressionParserSuite.scala| 91 +++-- .../parser/TableIdentifierParserSuite.scala| 34 +- .../resources/sql-tests/inputs/ansi/interval.sql | 188 + .../sql-tests/results/ansi/interval.sql.out| 439 + .../resources/sql-tests/results/literals.sql.out | 4 +- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 21 +- 8 files changed, 827 insertions(+), 48 deletions(-) diff --git a/docs/sql-reserved-and-non-reserved-keywords.md b/docs/sql-reserved-and-non-reserved-keywords.md index 53eb998..b1561fb 100644 --- a/docs/sql-reserved-and-non-reserved-keywords.md +++ b/docs/sql-reserved-and-non-reserved-keywords.md @@ -137,7 +137,8 @@ The list of reserved and non-reserved keywords can change according to the confi DATABASEnon-reservednon-reservednon-reserved DATABASESnon-reservednon-reservednon-reserved DATEnon-reservednon-reservedreserved - DAYnon-reservednon-reservedreserved + DAYreservednon-reservedreserved + DAYSnon-reservednon-reservednon-reserved DBPROPERTIESnon-reservednon-reservednon-reserved DEALLOCATEnon-reservednon-reservedreserved DECnon-reservednon-reservedreserved @@ -230,7 +231,8 @@ The list of reserved and non-reserved keywords can change according to the confi HANDLERnon-reservednon-reservedreserved HAVINGreservednon-reservedreserved HOLDnon-reservednon-reservedreserved - HOURnon-reservednon-reservedreserved + HOURreservednon-reservedreserved + HOURSnon-reservednon-reservednon-reserved IDENTITYnon-reservednon-reservedreserved IFnon-reservednon-reservedreserved IGNOREnon-reservednon-reservednon-reserved @@ -313,13 +315,19 @@ The list of reserved and non-reserved keywords can change according to the confi MEMBERnon-reservednon-reservedreserved MERGEnon-reservednon-reservedreserved METHODnon-reservednon-reservedreserved + MICROSECONDnon-reservednon-reservednon-reserved + MICROSECONDSnon-reservednon-reservednon-reserved + MILLISECONDnon-reservednon-reservednon-reserved + MILLISECONDSnon-reservednon-reservednon-reserved MINnon-reservednon-reservedreserved MINUSreservedreservednon-reserved - MINUTEnon-reservednon-reservedreserved + MINUTEreservednon-reservedreserved + MINUTESnon-reservednon-reservednon-reserved MODnon-reservednon-reservedreserved MODIFIESnon-reservednon-reservedreserved MODULEnon-reservednon-reservedreserved - MONTHnon-reservednon-reservedreserved + MONTHreservednon-reservedreserved + MONTHSnon-reservednon-reservednon-reserved MSCKnon-reservednon-reservednon-reserved MULTISETnon-reservednon-reservedreserved NAMESnon-reservednon-reservednon-reserved @@ -448,7 +456,8 @@ The list of reserved and non-reserved keywords can change according to the confi SCOPEnon-reservednon-reservedreserved SCROLLnon-reservednon-reservedreserved SEARCHnon-reservednon-reservedreserved - SECONDnon-reservednon-reservedreserved + SECONDreservednon-reservedreserved + SECONDSnon-reservednon-reservednon-reserved SECTIONnon-reservednon-reservednon-reserved SEEKnon-reservednon-reservednon-reserved SELECTreservednon
[spark] branch master updated (ef14237 -> c48e381)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from ef14237 [SPARK-28736][SPARK-28735][PYTHON][ML] Fix PySpark ML tests to pass in JDK 11 add c48e381 [SPARK-28671][SQL] Throw NoSuchPermanentFunctionException for a non-exsistent permanent function in dropFunction/alterFunction No new revisions were added by this update. Summary of changes: .../scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala | 4 ++-- .../org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-28224][SQL] Check overflow in decimal Sum aggregate
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new b79cf0d [SPARK-28224][SQL] Check overflow in decimal Sum aggregate b79cf0d is described below commit b79cf0d14351c741efe4f27523919a0e24b8b2ed Author: Mick Jermsurawong AuthorDate: Tue Aug 20 09:47:04 2019 +0900 [SPARK-28224][SQL] Check overflow in decimal Sum aggregate ## What changes were proposed in this pull request? - Currently `sum` in aggregates for decimal type can overflow and return null. - `Sum` expression codegens arithmetic on `sql.Decimal` and the output which preserves scale and precision goes into `UnsafeRowWriter`. Here overflowing will be converted to null when writing out. - It also does not go through this branch in `DecimalAggregates` because it's expecting precision of the sum (not the elements to be summed) to be less than 5. https://github.com/apache/spark/blob/4ebff5b6d68f26cc1ff9265a5489e0d7c2e05449/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala#L1400-L1403 - This PR adds the check at the final result of the sum operator itself. https://github.com/apache/spark/blob/4ebff5b6d68f26cc1ff9265a5489e0d7c2e05449/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/interfaces.scala#L372-L376 https://issues.apache.org/jira/browse/SPARK-28224 ## How was this patch tested? - Added an integration test on dataframe suite cc mgaido91 JoshRosen Closes #25033 from mickjermsurawong-stripe/SPARK-28224. Authored-by: Mick Jermsurawong Signed-off-by: Takeshi Yamamuro --- .../sql/catalyst/expressions/aggregate/Sum.scala | 7 ++- .../org/apache/spark/sql/DataFrameSuite.scala | 23 +- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala index ef204ec..d04fe92 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.TypeUtils +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types._ @ExpressionDescription( @@ -89,5 +90,9 @@ case class Sum(child: Expression) extends DeclarativeAggregate with ImplicitCast ) } - override lazy val evaluateExpression: Expression = sum + override lazy val evaluateExpression: Expression = resultType match { +case d: DecimalType => CheckOverflow(sum, d, SQLConf.get.decimalOperationsNullOnOverflow) +case _ => sum + } + } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index ba8fced..c6daff1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExc import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession} -import org.apache.spark.sql.test.SQLTestData.{NullStrings, TestData2} +import org.apache.spark.sql.test.SQLTestData.{DecimalData, NullStrings, TestData2} import org.apache.spark.sql.types._ import org.apache.spark.util.Utils import org.apache.spark.util.random.XORShiftRandom @@ -156,6 +156,27 @@ class DataFrameSuite extends QueryTest with SharedSparkSession { structDf.select(xxhash64($"a", $"record.*"))) } + test("SPARK-28224: Aggregate sum big decimal overflow") { +val largeDecimals = spark.sparkContext.parallelize( + DecimalData(BigDecimal("1"* 20 + ".123"), BigDecimal("1"* 20 + ".123")) :: +DecimalData(BigDecimal("9"* 20 + ".123"), BigDecimal("9"* 20 + ".123")) :: Nil).toDF() + +Seq(true, false).foreach { nullOnOverflow => + withSQLConf((SQLConf.DECIMAL_OPERATIONS_NULL_ON_OVERFLOW.key, nullOnOverflow.toString)) { +val structDf = largeDecimals.select("a").agg(sum("a")) +if (nullOnOverflow) { + checkAnswer(structDf, Row(null)) +} else { + val e = intercept[SparkException]
[spark] branch master updated (a50959a -> 0bfcf9c)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from a50959a [SPARK-27937][CORE] Revert partial logic for auto namespace discovery add 0bfcf9c [SPARK-28322][SQL] Add support to Decimal type for integral divide No new revisions were added by this update. Summary of changes: .../sql/catalyst/analysis/DecimalPrecision.scala | 17 ++ .../sql/catalyst/expressions/arithmetic.scala | 51 +++- .../scala/org/apache/spark/sql/types/Decimal.scala | 6 +- .../expressions/ArithmeticExpressionSuite.scala| 31 +++ .../resources/sql-tests/inputs/operator-div.sql| 9 +- .../resources/sql-tests/inputs/pgSQL/numeric.sql | 10 +- .../sql-tests/results/operator-div.sql.out | 90 ++- .../sql-tests/results/pgSQL/numeric.sql.out| 274 - 8 files changed, 334 insertions(+), 154 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (b2f0660 -> ff5fa58)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from b2f0660 [SPARK-29002][SQL] Avoid changing SMJ to BHJ if the build side has a high ratio of empty partitions add ff5fa58 [SPARK-21870][SQL][FOLLOW-UP] Clean up string template formats for generated code in HashAggregateExec No new revisions were added by this update. Summary of changes: .../execution/aggregate/HashAggregateExec.scala| 102 ++--- 1 file changed, 49 insertions(+), 53 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-29065][SQL][TEST] Extend `EXTRACT` benchmark
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 8e9fafb [SPARK-29065][SQL][TEST] Extend `EXTRACT` benchmark 8e9fafb is described below commit 8e9fafbb21a26028710df1831fc16b41855f0b4a Author: Maxim Gekk AuthorDate: Thu Sep 12 21:32:35 2019 +0900 [SPARK-29065][SQL][TEST] Extend `EXTRACT` benchmark ### What changes were proposed in this pull request? In the PR, I propose to extend `ExtractBenchmark` and add new ones for: - `EXTRACT` and `DATE` as input column - the `DATE_PART` function and `DATE`/`TIMESTAMP` input column ### Why are the changes needed? The `EXTRACT` expression is rebased on the `DATE_PART` expression by the PR https://github.com/apache/spark/pull/25410 where some of sub-expressions take `DATE` column as the input (`Millennium`, `Year` and etc.) but others require `TIMESTAMP` column (`Hour`, `Minute`). Separate benchmarks for `DATE` should exclude overhead of implicit conversions `DATE` <-> `TIMESTAMP`. ### Does this PR introduce any user-facing change? No, it doesn't. ### How was this patch tested? - Regenerated results of `ExtractBenchmark` Closes #25772 from MaxGekk/date_part-benchmark. Authored-by: Maxim Gekk Signed-off-by: Takeshi Yamamuro --- sql/core/benchmarks/ExtractBenchmark-results.txt | 243 + .../sql/execution/benchmark/ExtractBenchmark.scala | 95 +--- 2 files changed, 161 insertions(+), 177 deletions(-) diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt index 7ee185e..31ad787 100644 --- a/sql/core/benchmarks/ExtractBenchmark-results.txt +++ b/sql/core/benchmarks/ExtractBenchmark-results.txt @@ -1,145 +1,100 @@ - -Extract - - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -cast to timestamp:Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative - -cast to timestamp wholestage off407432 36 24.6 40.7 1.0X -cast to timestamp wholestage on 348396 80 28.7 34.8 1.2X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -MILLENNIUM of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative - -MILLENNIUM of timestamp wholestage off 1407 1408 2 7.1 140.7 1.0X -MILLENNIUM of timestamp wholestage on 1334 1380 81 7.5 133.4 1.1X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -CENTURY of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative - -CENTURY of timestamp wholestage off1362 1364 3 7.3 136.2 1.0X -CENTURY of timestamp wholestage on 1334 1342 8 7.5 133.4 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -DECADE of timestamp: Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative - -DECADE of timestamp wholestage off 1226 1229 4 8.2 122.6 1.0X -DECADE of timestamp wholestage on 1218 1225 8 8.2 121.8 1.0X - -OpenJDK 64-Bit Server VM 1.8.0_222-b10 on Linux 3.10.0-862.3.2.el7.x86_64 -Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz -YEAR of timestamp:Best Time(ms) Avg Time(ms) Stdev(ms)Rate(M/s) Per Row(ns) Relative -
[spark] branch master updated (962e330 -> ca6f693)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 962e330 [SPARK-26598][SQL] Fix HiveThriftServer2 cannot be modified hiveconf/hivevar variables add ca6f693 [SPARK-28939][SQL][FOLLOWUP] Avoid useless Properties No new revisions were added by this update. Summary of changes: .../scala/org/apache/spark/sql/execution/SQLExecutionRDD.scala| 8 +--- 1 file changed, 1 insertion(+), 7 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (88c8d5e -> 95073fb)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 88c8d5e [SPARK-23539][SS][FOLLOWUP][TESTS] Add UT to ensure existing query doesn't break with default conf of includeHeaders add 95073fb [SPARK-29008][SQL] Define an individual method for each common subexpression in HashAggregateExec No new revisions were added by this update. Summary of changes: .../expressions/codegen/CodeGenerator.scala| 78 +++--- .../execution/aggregate/HashAggregateExec.scala| 10 +-- .../sql/execution/WholeStageCodegenSuite.scala | 25 ++- 3 files changed, 93 insertions(+), 20 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-28412][SQL] ANSI SQL: OVERLAY function support byte array
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new aafce7e [SPARK-28412][SQL] ANSI SQL: OVERLAY function support byte array aafce7e is described below commit aafce7ebffe1acd8f6022f208beaa9ec6c9f7592 Author: gengjiaan AuthorDate: Tue Sep 10 08:16:18 2019 +0900 [SPARK-28412][SQL] ANSI SQL: OVERLAY function support byte array ## What changes were proposed in this pull request? This is a ANSI SQL and feature id is `T312` ``` ::= OVERLAY PLACING FROM [ FOR ] ``` This PR related to https://github.com/apache/spark/pull/24918 and support treat byte array. ref: https://www.postgresql.org/docs/11/functions-binarystring.html ## How was this patch tested? new UT. There are some show of the PR on my production environment. ``` spark-sql> select overlay(encode('Spark SQL', 'utf-8') PLACING encode('_', 'utf-8') FROM 6); Spark_SQL Time taken: 0.285 s spark-sql> select overlay(encode('Spark SQL', 'utf-8') PLACING encode('CORE', 'utf-8') FROM 7); Spark CORE Time taken: 0.202 s spark-sql> select overlay(encode('Spark SQL', 'utf-8') PLACING encode('ANSI ', 'utf-8') FROM 7 FOR 0); Spark ANSI SQL Time taken: 0.165 s spark-sql> select overlay(encode('Spark SQL', 'utf-8') PLACING encode('tructured', 'utf-8') FROM 2 FOR 4); Structured SQL Time taken: 0.141 s ``` Closes #25172 from beliefer/ansi-overlay-byte-array. Lead-authored-by: gengjiaan Co-authored-by: Jiaan Geng Signed-off-by: Takeshi Yamamuro --- .../catalyst/expressions/stringExpressions.scala | 60 +++--- .../expressions/StringExpressionsSuite.scala | 72 +- .../scala/org/apache/spark/sql/functions.scala | 16 ++--- .../apache/spark/sql/StringFunctionsSuite.scala| 33 +++--- 4 files changed, 157 insertions(+), 24 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index d7a5fb2..e4847e9 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -472,6 +472,19 @@ object Overlay { builder.append(input.substringSQL(pos + length, Int.MaxValue)) builder.build() } + + def calculate(input: Array[Byte], replace: Array[Byte], pos: Int, len: Int): Array[Byte] = { +// If you specify length, it must be a positive whole number or zero. +// Otherwise it will be ignored. +// The default value for length is the length of replace. +val length = if (len >= 0) { + len +} else { + replace.length +} +ByteArray.concat(ByteArray.subStringSQL(input, 1, pos - 1), + replace, ByteArray.subStringSQL(input, pos + length, Int.MaxValue)) + } } // scalastyle:off line.size.limit @@ -487,6 +500,14 @@ object Overlay { Spark ANSI SQL > SELECT _FUNC_('Spark SQL' PLACING 'tructured' FROM 2 FOR 4); Structured SQL + > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('_', 'utf-8') FROM 6); + Spark_SQL + > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('CORE', 'utf-8') FROM 7); + Spark CORE + > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('ANSI ', 'utf-8') FROM 7 FOR 0); + Spark ANSI SQL + > SELECT _FUNC_(encode('Spark SQL', 'utf-8') PLACING encode('tructured', 'utf-8') FROM 2 FOR 4); + Structured SQL """) // scalastyle:on line.size.limit case class Overlay(input: Expression, replace: Expression, pos: Expression, len: Expression) @@ -496,19 +517,42 @@ case class Overlay(input: Expression, replace: Expression, pos: Expression, len: this(str, replace, pos, Literal.create(-1, IntegerType)) } - override def dataType: DataType = StringType + override def dataType: DataType = input.dataType - override def inputTypes: Seq[AbstractDataType] = -Seq(StringType, StringType, IntegerType, IntegerType) + override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(StringType, BinaryType), +TypeCollection(StringType, BinaryType), IntegerType, IntegerType) override def children: Seq[Expression] = input :: replace :: pos :: len :: Nil + override def checkInputDataTypes(): TypeCheckResult = { +val inputTypeCheck = super.checkInputDataTypes() +if (inputTypeCheck.isSuccess) { + TypeUtils.checkForSameTypeInputExpr( +input.dataType :: replace.dataType :: Nil, s"function $prettyName") +} else {
[spark] branch master updated (905b7f7 -> 67b4329)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 905b7f7 [SPARK-28967][CORE] Include cloned version of "properties" to avoid ConcurrentModificationException add 67b4329 [SPARK-28690][SQL] Add `date_part` function for timestamps/dates No new revisions were added by this update. Summary of changes: .../sql/catalyst/analysis/FunctionRegistry.scala | 1 + .../catalyst/expressions/datetimeExpressions.scala | 87 + .../spark/sql/catalyst/parser/AstBuilder.scala | 48 +-- .../test/resources/sql-tests/inputs/date_part.sql | 68 .../resources/sql-tests/inputs/pgSQL/timestamp.sql | 31 +- .../resources/sql-tests/results/date_part.sql.out | 412 + .../resources/sql-tests/results/extract.sql.out| 126 +++ .../resources/sql-tests/results/pgSQL/date.sql.out | 52 +-- .../sql-tests/results/pgSQL/timestamp.sql.out | 55 ++- 9 files changed, 727 insertions(+), 153 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/date_part.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/date_part.sql.out - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-28071][SQL][TEST] Port strings.sql
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 2656c9d [SPARK-28071][SQL][TEST] Port strings.sql 2656c9d is described below commit 2656c9d304b59584c331b923e8536e4093d83f81 Author: Yuming Wang AuthorDate: Tue Jul 30 18:54:14 2019 +0900 [SPARK-28071][SQL][TEST] Port strings.sql ## What changes were proposed in this pull request? This PR is to port strings.sql from PostgreSQL regression tests. https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/strings.sql The expected results can be found in the link: https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/expected/strings.out When porting the test cases, found nine PostgreSQL specific features that do not exist in Spark SQL: [SPARK-28076](https://issues.apache.org/jira/browse/SPARK-28076): Support regular expression substring [SPARK-28078](https://issues.apache.org/jira/browse/SPARK-28078): Add support other 4 REGEXP functions [SPARK-28412](https://issues.apache.org/jira/browse/SPARK-28412): OVERLAY function support byte array [SPARK-28083](https://issues.apache.org/jira/browse/SPARK-28083): ANSI SQL: LIKE predicate: ESCAPE clause [SPARK-28087](https://issues.apache.org/jira/browse/SPARK-28087): Add support split_part [SPARK-28122](https://issues.apache.org/jira/browse/SPARK-28122): Missing `sha224`/`sha256 `/`sha384 `/`sha512 ` functions [SPARK-28123](https://issues.apache.org/jira/browse/SPARK-28123): Add support string functions: btrim [SPARK-28448](https://issues.apache.org/jira/browse/SPARK-28448): Implement ILIKE operator [SPARK-28449](https://issues.apache.org/jira/browse/SPARK-28449): Missing escape_string_warning and standard_conforming_strings config Also, found five inconsistent behavior: [SPARK-27952](https://issues.apache.org/jira/browse/SPARK-27952): String Functions: regexp_replace is not compatible [SPARK-28121](https://issues.apache.org/jira/browse/SPARK-28121): decode can not accept 'escape' as charset [SPARK-27930](https://issues.apache.org/jira/browse/SPARK-27930): Replace `strpos` with `locate` or `position` in Spark SQL [SPARK-27930](https://issues.apache.org/jira/browse/SPARK-27930): Replace `to_hex` with `hex ` or in Spark SQL [SPARK-28451](https://issues.apache.org/jira/browse/SPARK-28451): `substr` returns different values ## How was this patch tested? N/A Closes #24923 from wangyum/SPARK-28071. Authored-by: Yuming Wang Signed-off-by: Takeshi Yamamuro --- .../resources/sql-tests/inputs/pgSQL/strings.sql | 660 +++ .../sql-tests/results/pgSQL/strings.sql.out| 718 + 2 files changed, 1378 insertions(+) diff --git a/sql/core/src/test/resources/sql-tests/inputs/pgSQL/strings.sql b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/strings.sql new file mode 100644 index 000..a684428 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/pgSQL/strings.sql @@ -0,0 +1,660 @@ +-- +-- Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group +-- +-- STRINGS +-- -- https://github.com/postgres/postgres/blob/REL_12_BETA2/src/test/regress/sql/strings.sql +-- Test various data entry syntaxes. +-- + +-- SQL string continuation syntax +-- E021-03 character string literals +SELECT 'first line' +' - next line' + ' - third line' + AS `Three lines to one`; + +-- Spark SQL support this string continuation syntax +-- illegal string continuation syntax +SELECT 'first line' +' - next line' /* this comment is not allowed here */ +' - third line' + AS `Illegal comment within continuation`; + +-- [SPARK-28447] ANSI SQL: Unicode escapes in literals +-- Unicode escapes +-- SET standard_conforming_strings TO on; + +-- SELECT U&'d\0061t\+61' AS U&"d\0061t\+61"; +-- SELECT U&'d!0061t\+61' UESCAPE '!' AS U&"d*0061t\+61" UESCAPE '*'; + +-- SELECT U&' \' UESCAPE '!' AS "tricky"; +-- SELECT 'tricky' AS U&"\" UESCAPE '!'; + +-- SELECT U&'wrong: \061'; +-- SELECT U&'wrong: \+0061'; +-- SELECT U&'wrong: +0061' UESCAPE '+'; + +-- SET standard_conforming_strings TO off; + +-- SELECT U&'d\0061t\+61' AS U&"d\0061t\+61"; +-- SELECT U&'d!0061t\+61' UESCAPE '!' AS U&"d*0061t\+61" UESCAPE '*'; + +-- SELECT U&' \' UESCAPE '!' AS "tricky"; +-- SELECT 'tricky' AS U&"\" UESCAPE '!'; + +-- SELECT U&'wrong: \061'; +-- SELECT U&'wrong: \+0061'; +-- SELECT U&'wrong: +0061' UESCAPE '+'; + +-- RESET standard_conforming_strings; + +-- Spark SQL only support escape mode +-- bytea +-- SET bytea_output TO hex;
[spark] branch master updated (325bc8e -> 8617bf6)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 325bc8e [SPARK-28583][SQL] Subqueries should not call `onUpdatePlan` in Adaptive Query Execution add 8617bf6 [SPARK-28470][SQL] Cast to decimal throws ArithmeticException on overflow No new revisions were added by this update. Summary of changes: .../spark/sql/catalyst/expressions/Cast.scala | 36 ++ .../spark/sql/catalyst/expressions/CastSuite.scala | 22 + 2 files changed, 52 insertions(+), 6 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated: [SPARK-28520][SQL] WholeStageCodegen does not work property for LocalTableScanExec
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git The following commit(s) were added to refs/heads/master by this push: new 6bc5c6a [SPARK-28520][SQL] WholeStageCodegen does not work property for LocalTableScanExec 6bc5c6a is described below commit 6bc5c6a4e7c36361db437313cd950509a1ab6db2 Author: Kousuke Saruta AuthorDate: Mon Jul 29 08:35:25 2019 +0900 [SPARK-28520][SQL] WholeStageCodegen does not work property for LocalTableScanExec Code is not generated for LocalTableScanExec although proper situations. If a LocalTableScanExec plan has the direct parent plan which supports WholeStageCodegen, the LocalTableScanExec plan also should be within a WholeStageCodegen domain. But code is not generated for LocalTableScanExec and InputAdapter is inserted for now. ``` val df1 = spark.createDataset(1 to 10).toDF val df2 = spark.createDataset(1 to 10).toDF val df3 = df1.join(df2, df1("value") === df2("value")) df3.explain(true) ... == Physical Plan == *(1) BroadcastHashJoin [value#1], [value#6], Inner, BuildRight :- LocalTableScan [value#1] // LocalTableScanExec is not within a WholeStageCodegen domain +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint))) +- LocalTableScan [value#6] ``` ``` scala> df3.queryExecution.executedPlan.children.head.children.head.getClass res4: Class[_ <: org.apache.spark.sql.execution.SparkPlan] = class org.apache.spark.sql.execution.InputAdapter ``` For the current implementation of LocalTableScanExec, codegen is enabled in case `parent` is not null but `parent` is set in `consume`, which is called after `insertInputAdapter` so it doesn't work as intended. After applying this cnahge, we can get following plan, which means LocalTableScanExec is within a WholeStageCodegen domain. ``` == Physical Plan == *(1) BroadcastHashJoin [value#63], [value#68], Inner, BuildRight :- *(1) LocalTableScan [value#63] +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint))) +- LocalTableScan [value#68] ## How was this patch tested? New test cases are added into WholeStageCodegenSuite. Closes #25260 from sarutak/localtablescan-improvement. Authored-by: Kousuke Saruta Signed-off-by: Takeshi Yamamuro --- .../spark/sql/execution/LocalTableScanExec.scala | 3 --- .../sql/execution/WholeStageCodegenExec.scala | 4 +++ .../sql/execution/WholeStageCodegenSuite.scala | 30 ++ 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala index 31640db..9e32ecf 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala @@ -80,8 +80,5 @@ case class LocalTableScanExec( // Input is already UnsafeRows. override protected val createUnsafeProjection: Boolean = false - // Do not codegen when there is no parent - to support the fast driver-local collect/take paths. - override def supportCodegen: Boolean = (parent != null) - override def inputRDD: RDD[InternalRow] = rdd } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala index a0afa9a..d9d9b1f 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/WholeStageCodegenExec.scala @@ -895,6 +895,10 @@ case class CollapseCodegenStages( // domain object can not be written into unsafe row. case plan if plan.output.length == 1 && plan.output.head.dataType.isInstanceOf[ObjectType] => plan.withNewChildren(plan.children.map(insertWholeStageCodegen(_, isColumnar))) + case plan: LocalTableScanExec => +// Do not make LogicalTableScanExec the root of WholeStageCodegen +// to support the fast driver-local collect/take paths. +plan case plan: CodegenSupport if supportCodegen(plan) => WholeStageCodegenExec( insertInputAdapter(plan, isColumnar))(codegenStageCounter.incrementAndGet()) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala index 483a046..59b9e15 100644 --- a/sql/core/src/test/scala/org/apache/spark
[spark] branch master updated (94499af -> 794804e)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 94499af [SPARK-28486][CORE][PYTHON] Map PythonBroadcast's data file to a BroadcastBlock to avoid delete by GC add 794804e [SPARK-28537][SQL] DebugExec cannot debug broadcast or columnar related queries No new revisions were added by this update. Summary of changes: .../apache/spark/sql/execution/debug/package.scala | 10 + .../spark/sql/execution/debug/DebuggingSuite.scala | 43 ++ 2 files changed, 53 insertions(+) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (f21bc18 -> 5159876)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from f21bc18 [SPARK-27889][INFRA] Make development scripts under dev/ support Python 3 add 5159876 [SPARK-28077][SQL][TEST][FOLLOW-UP] Enable Overlay function tests No new revisions were added by this update. Summary of changes: .../resources/sql-tests/inputs/pgSQL/strings.sql | 8 +- .../sql-tests/results/pgSQL/strings.sql.out| 422 +++-- 2 files changed, 231 insertions(+), 199 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (d51d228 -> 04536b2)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from d51d228 [SPARK-29397][CORE] Extend plugin interface to include the driver add 04536b2 [SPARK-28552][SQL] Case-insensitive database URLs in JdbcDialect No new revisions were added by this update. Summary of changes: .../scala/org/apache/spark/sql/jdbc/DB2Dialect.scala | 4 +++- .../org/apache/spark/sql/jdbc/DerbyDialect.scala | 4 +++- .../apache/spark/sql/jdbc/MsSqlServerDialect.scala| 5 - .../org/apache/spark/sql/jdbc/MySQLDialect.scala | 4 +++- .../org/apache/spark/sql/jdbc/OracleDialect.scala | 5 +++-- .../org/apache/spark/sql/jdbc/PostgresDialect.scala | 4 +++- .../org/apache/spark/sql/jdbc/TeradataDialect.scala | 5 +++-- .../scala/org/apache/spark/sql/jdbc/JDBCSuite.scala | 19 ++- 8 files changed, 40 insertions(+), 10 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (888cc46 -> 8a8ac00)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 888cc46 [SPARK-29675][SQL] Add exception when isolationLevel is Illegal add 8a8ac00 [SPARK-29687][SQL] Fix JDBC metrics counter data type No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (888cc46 -> 8a8ac00)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 888cc46 [SPARK-29675][SQL] Add exception when isolationLevel is Illegal add 8a8ac00 [SPARK-29687][SQL] Fix JDBC metrics counter data type No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (dcea7a4 -> c2f29d5)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from dcea7a4 [SPARK-29883][SQL] Implement a helper method for aliasing bool_and() and bool_or() add c2f29d5 [SPARK-30138][SQL] Separate configuration key of max iterations for analyzer and optimizer No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 4 ++-- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 10 +- 2 files changed, 11 insertions(+), 3 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (dcea7a4 -> c2f29d5)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from dcea7a4 [SPARK-29883][SQL] Implement a helper method for aliasing bool_and() and bool_or() add c2f29d5 [SPARK-30138][SQL] Separate configuration key of max iterations for analyzer and optimizer No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 4 ++-- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala | 10 +- 2 files changed, 11 insertions(+), 3 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (afc4fa0 -> e88d740)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from afc4fa0 [SPARK-30156][BUILD] Upgrade Jersey from 2.29 to 2.29.1 add e88d740 [SPARK-30147][SQL] Trim the string when cast string type to booleans No new revisions were added by this update. Summary of changes: docs/sql-migration-guide.md| 4 +-- .../spark/sql/catalyst/util/StringUtils.scala | 5 ++-- .../spark/sql/catalyst/expressions/CastSuite.scala | 2 ++ .../src/test/resources/sql-tests/inputs/cast.sql | 7 +- .../test/resources/sql-tests/results/cast.sql.out | 29 +- 5 files changed, 39 insertions(+), 8 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (1c714be -> bf7215c)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 1c714be [SPARK-25100][TEST][FOLLOWUP] Refactor test cases in `FileSuite` and `KryoSerializerSuite` add bf7215c [SPARK-30066][SQL][FOLLOWUP] Remove size field for interval column cache No new revisions were added by this update. Summary of changes: .../spark/sql/execution/columnar/ColumnType.scala | 19 +-- .../sql/execution/columnar/ColumnTypeSuite.scala | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (1c714be -> bf7215c)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 1c714be [SPARK-25100][TEST][FOLLOWUP] Refactor test cases in `FileSuite` and `KryoSerializerSuite` add bf7215c [SPARK-30066][SQL][FOLLOWUP] Remove size field for interval column cache No new revisions were added by this update. Summary of changes: .../spark/sql/execution/columnar/ColumnType.scala | 19 +-- .../sql/execution/columnar/ColumnTypeSuite.scala | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (1c714be -> bf7215c)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 1c714be [SPARK-25100][TEST][FOLLOWUP] Refactor test cases in `FileSuite` and `KryoSerializerSuite` add bf7215c [SPARK-30066][SQL][FOLLOWUP] Remove size field for interval column cache No new revisions were added by this update. Summary of changes: .../spark/sql/execution/columnar/ColumnType.scala | 19 +-- .../sql/execution/columnar/ColumnTypeSuite.scala | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (8bd8f49 -> 0cf4f07)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 8bd8f49 [SPARK-29500][SQL][SS] Support partition column when writing to Kafka add 0cf4f07 [SPARK-29545][SQL] Add support for bit_xor aggregate function No new revisions were added by this update. Summary of changes: .../sql/catalyst/analysis/FunctionRegistry.scala | 1 + .../expressions/aggregate/bitwiseAggregates.scala | 92 -- .../test/resources/sql-tests/inputs/bitwise.sql| 31 .../resources/sql-tests/results/bitwise.sql.out| 71 - 4 files changed, 151 insertions(+), 44 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (1296bbb -> 67cf043)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 1296bbb [SPARK-29504][WEBUI] Toggle full job description on click add 67cf043 [SPARK-29145][SQL] Support sub-queries in join conditions No new revisions were added by this update. Summary of changes: .../spark/sql/catalyst/analysis/Analyzer.scala | 2 + .../sql/catalyst/analysis/CheckAnalysis.scala | 4 +- .../catalyst/analysis/ResolveSubquerySuite.scala | 14 +- .../scala/org/apache/spark/sql/SubquerySuite.scala | 148 + 4 files changed, 165 insertions(+), 3 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (091cbc3 -> cfbdd9d)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 091cbc3 [SPARK-9612][ML] Add instance weight support for GBTs add cfbdd9d [SPARK-29461][SQL] Measure the number of records being updated for JDBC writer No new revisions were added by this update. Summary of changes: .../sql/execution/datasources/jdbc/JdbcUtils.scala | 23 +++-- .../org/apache/spark/sql/jdbc/JDBCWriteSuite.scala | 55 ++ 2 files changed, 73 insertions(+), 5 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (9c817a8 -> 6958d7e)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 9c817a8 [SPARK-29637][CORE] Add description to Job SHS web API add 6958d7e [SPARK-28746][SQL] Add partitionby hint for sql queries No new revisions were added by this update. Summary of changes: .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../spark/sql/catalyst/analysis/ResolveHints.scala | 108 + .../sql/catalyst/analysis/ResolveHintsSuite.scala | 81 ++-- .../sql/catalyst/parser/PlanParserSuite.scala | 46 + .../org/apache/spark/sql/DataFrameHintSuite.scala | 12 +++ 5 files changed, 220 insertions(+), 29 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (40b8a08 -> 9e58b10)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 40b8a08 [SPARK-29963][SQL][TESTS] Check formatting timestamps up to microsecond precision by JSON/CSV datasource add 9e58b10 [SPARK-29945][SQL] do not handle negative sign specially in the parser No new revisions were added by this update. Summary of changes: .../apache/spark/sql/catalyst/parser/SqlBase.g4| 4 +- .../spark/sql/catalyst/parser/AstBuilder.scala | 31 +++ .../catalyst/parser/ExpressionParserSuite.scala| 5 +- .../test/resources/sql-tests/inputs/literals.sql | 5 +- .../sql-tests/results/ansi/interval.sql.out| 12 ++-- .../sql-tests/results/ansi/literals.sql.out| 65 +++--- .../results/interval-display-iso_8601.sql.out | 2 +- .../results/interval-display-sql_standard.sql.out | 2 +- .../sql-tests/results/interval-display.sql.out | 2 +- .../resources/sql-tests/results/interval.sql.out | 12 ++-- .../resources/sql-tests/results/literals.sql.out | 65 +++--- .../sql-tests/results/postgreSQL/interval.sql.out | 2 +- 12 files changed, 98 insertions(+), 109 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (de21f28 -> bd9ce83)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from de21f28 [SPARK-29986][SQL] casting string to date/timestamp/interval should trim all whitespaces add bd9ce83 [SPARK-29975][SQL][FOLLOWUP] document --CONFIG_DIM No new revisions were added by this update. Summary of changes: .../org/apache/spark/sql/SQLQueryTestSuite.scala | 22 -- .../thriftserver/ThriftServerQueryTestSuite.scala | 8 +++- 2 files changed, 19 insertions(+), 11 deletions(-) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] branch master updated (14337f6 -> f53be0a)
This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a change to branch master in repository https://gitbox.apache.org/repos/asf/spark.git. from 14337f6 [SPARK-29643][SQL] ALTER TABLE/VIEW (DROP PARTITION) should look up catalog/table like v2 commands add f53be0a [SPARK-29109][SQL][TESTS] Port window.sql (Part 3) No new revisions were added by this update. Summary of changes: .../sql-tests/inputs/postgreSQL/window_part3.sql | 451 + .../results/postgreSQL/window_part3.sql.out| 409 +++ 2 files changed, 860 insertions(+) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/postgreSQL/window_part3.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part3.sql.out - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org