[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.4.0-rc5 [created] 0a4c03f7d - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: Preparing development version 2.4.1-SNAPSHOT
Preparing development version 2.4.1-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/22bec3c6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/22bec3c6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/22bec3c6 Branch: refs/heads/branch-2.4 Commit: 22bec3c6dab1147eee0342993aa8f64202603a8d Parents: 0a4c03f Author: Wenchen Fan Authored: Mon Oct 29 06:15:33 2018 + Committer: Wenchen Fan Committed: Mon Oct 29 06:15:33 2018 + -- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml| 2 +- external/flume-sink/pom.xml| 2 +- external/flume/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10/pom.xml| 2 +- external/kafka-0-8-assembly/pom.xml| 2 +- external/kafka-0-8/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 43 files changed, 44 insertions(+), 44 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/22bec3c6/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index f52d785..714b6f1 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.4.0 +Version: 2.4.1 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), http://git-wip-us.apache.org/repos/asf/spark/blob/22bec3c6/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 63ab510..ee0de73 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.4.0 +2.4.1-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/22bec3c6/common/kvstore/pom.xml -- diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index b10e118..b89e0fe 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.4.0 +2.4.1-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/22bec3c6/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network
[1/2] spark git commit: Preparing Spark release v2.4.0-rc5
Repository: spark Updated Branches: refs/heads/branch-2.4 7f4fce426 -> 22bec3c6d Preparing Spark release v2.4.0-rc5 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0a4c03f7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0a4c03f7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0a4c03f7 Branch: refs/heads/branch-2.4 Commit: 0a4c03f7d084f1d2aa48673b99f3b9496893ce8d Parents: 7f4fce4 Author: Wenchen Fan Authored: Mon Oct 29 06:15:29 2018 + Committer: Wenchen Fan Committed: Mon Oct 29 06:15:29 2018 + -- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/kvstore/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml| 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml| 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/avro/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml| 2 +- external/flume-sink/pom.xml| 2 +- external/flume/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml| 2 +- external/kafka-0-10/pom.xml| 2 +- external/kafka-0-8-assembly/pom.xml| 2 +- external/kafka-0-8/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml| 2 +- graphx/pom.xml | 2 +- hadoop-cloud/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml| 2 +- mllib/pom.xml | 2 +- pom.xml| 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml| 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 43 files changed, 44 insertions(+), 44 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0a4c03f7/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 714b6f1..f52d785 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.4.1 +Version: 2.4.0 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), http://git-wip-us.apache.org/repos/asf/spark/blob/0a4c03f7/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index ee0de73..63ab510 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.4.1-SNAPSHOT +2.4.0 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0a4c03f7/common/kvstore/pom.xml -- diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index b89e0fe..b10e118 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.4.1-SNAPSHOT +2.4.0 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/0a4c03f7/common/network-common/pom.xml ---
spark git commit: [SPARK-25179][PYTHON][DOCS] Document BinaryType support in Arrow conversion
Repository: spark Updated Branches: refs/heads/branch-2.4 b6ba0dd47 -> 7f4fce426 [SPARK-25179][PYTHON][DOCS] Document BinaryType support in Arrow conversion ## What changes were proposed in this pull request? This PR targets to document binary type in "Apache Arrow in Spark". ## How was this patch tested? Manually built the documentation and checked. Closes #22871 from HyukjinKwon/SPARK-25179. Authored-by: hyukjinkwon Signed-off-by: gatorsmile (cherry picked from commit fbaf150507a289ec0ac02fdbf4009c42cd9bc164) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7f4fce42 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7f4fce42 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7f4fce42 Branch: refs/heads/branch-2.4 Commit: 7f4fce426025d54f41d8e87928582563a8ad689e Parents: b6ba0dd Author: hyukjinkwon Authored: Sun Oct 28 23:01:35 2018 -0700 Committer: gatorsmile Committed: Sun Oct 28 23:02:09 2018 -0700 -- docs/sql-pyspark-pandas-with-arrow.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7f4fce42/docs/sql-pyspark-pandas-with-arrow.md -- diff --git a/docs/sql-pyspark-pandas-with-arrow.md b/docs/sql-pyspark-pandas-with-arrow.md index e8e9f55..d04b955 100644 --- a/docs/sql-pyspark-pandas-with-arrow.md +++ b/docs/sql-pyspark-pandas-with-arrow.md @@ -127,8 +127,9 @@ For detailed usage, please see [`pyspark.sql.functions.pandas_udf`](api/python/p ### Supported SQL Types -Currently, all Spark SQL data types are supported by Arrow-based conversion except `BinaryType`, `MapType`, -`ArrayType` of `TimestampType`, and nested `StructType`. +Currently, all Spark SQL data types are supported by Arrow-based conversion except `MapType`, +`ArrayType` of `TimestampType`, and nested `StructType`. `BinaryType` is supported only when +installed PyArrow is equal to or higher then 0.10.0. ### Setting Arrow Batch Size - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25179][PYTHON][DOCS] Document BinaryType support in Arrow conversion
Repository: spark Updated Branches: refs/heads/master 4e990d9dd -> fbaf15050 [SPARK-25179][PYTHON][DOCS] Document BinaryType support in Arrow conversion ## What changes were proposed in this pull request? This PR targets to document binary type in "Apache Arrow in Spark". ## How was this patch tested? Manually built the documentation and checked. Closes #22871 from HyukjinKwon/SPARK-25179. Authored-by: hyukjinkwon Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fbaf1505 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fbaf1505 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fbaf1505 Branch: refs/heads/master Commit: fbaf150507a289ec0ac02fdbf4009c42cd9bc164 Parents: 4e990d9 Author: hyukjinkwon Authored: Sun Oct 28 23:01:35 2018 -0700 Committer: gatorsmile Committed: Sun Oct 28 23:01:35 2018 -0700 -- docs/sql-pyspark-pandas-with-arrow.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fbaf1505/docs/sql-pyspark-pandas-with-arrow.md -- diff --git a/docs/sql-pyspark-pandas-with-arrow.md b/docs/sql-pyspark-pandas-with-arrow.md index e8e9f55..d04b955 100644 --- a/docs/sql-pyspark-pandas-with-arrow.md +++ b/docs/sql-pyspark-pandas-with-arrow.md @@ -127,8 +127,9 @@ For detailed usage, please see [`pyspark.sql.functions.pandas_udf`](api/python/p ### Supported SQL Types -Currently, all Spark SQL data types are supported by Arrow-based conversion except `BinaryType`, `MapType`, -`ArrayType` of `TimestampType`, and nested `StructType`. +Currently, all Spark SQL data types are supported by Arrow-based conversion except `MapType`, +`ArrayType` of `TimestampType`, and nested `StructType`. `BinaryType` is supported only when +installed PyArrow is equal to or higher then 0.10.0. ### Setting Arrow Batch Size - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [DOC] Fix doc for spark.sql.parquet.recordLevelFilter.enabled
Repository: spark Updated Branches: refs/heads/branch-2.3 3e0160bac -> 632c0d911 [DOC] Fix doc for spark.sql.parquet.recordLevelFilter.enabled ## What changes were proposed in this pull request? Updated the doc string value for spark.sql.parquet.recordLevelFilter.enabled to indicate that spark.sql.parquet.enableVectorizedReader must be disabled. The code in ParquetFileFormat uses spark.sql.parquet.recordLevelFilter.enabled only after falling back to parquet-mr (see else for this if statement): https://github.com/apache/spark/blob/d5573c578a1eea9ee04886d9df37c7178e67bb30/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala#L412 https://github.com/apache/spark/blob/d5573c578a1eea9ee04886d9df37c7178e67bb30/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala#L427-L430 Tests also bear this out. ## How was this patch tested? This is just a doc string fix: I built Spark and ran a single test. Closes #22865 from bersprockets/confdocfix. Authored-by: Bruce Robbins Signed-off-by: Wenchen Fan (cherry picked from commit 4e990d9dd2407dc257712c4b12b507f0990ca4e9) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/632c0d91 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/632c0d91 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/632c0d91 Branch: refs/heads/branch-2.3 Commit: 632c0d911c1bbdc715fe476ea49db9bfd387517f Parents: 3e0160b Author: Bruce Robbins Authored: Mon Oct 29 13:44:58 2018 +0800 Committer: Wenchen Fan Committed: Mon Oct 29 13:46:09 2018 +0800 -- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala| 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/632c0d91/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 0bcc5d0..f35c5c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -362,7 +362,8 @@ object SQLConf { val PARQUET_RECORD_FILTER_ENABLED = buildConf("spark.sql.parquet.recordLevelFilter.enabled") .doc("If true, enables Parquet's native record-level filtering using the pushed down " + "filters. This configuration only has an effect when 'spark.sql.parquet.filterPushdown' " + - "is enabled.") + "is enabled and the vectorized reader is not used. You can ensure the vectorized reader " + + "is not used by setting 'spark.sql.parquet.enableVectorizedReader' to false.") .booleanConf .createWithDefault(false) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [DOC] Fix doc for spark.sql.parquet.recordLevelFilter.enabled
Repository: spark Updated Branches: refs/heads/branch-2.4 00771dced -> b6ba0dd47 [DOC] Fix doc for spark.sql.parquet.recordLevelFilter.enabled ## What changes were proposed in this pull request? Updated the doc string value for spark.sql.parquet.recordLevelFilter.enabled to indicate that spark.sql.parquet.enableVectorizedReader must be disabled. The code in ParquetFileFormat uses spark.sql.parquet.recordLevelFilter.enabled only after falling back to parquet-mr (see else for this if statement): https://github.com/apache/spark/blob/d5573c578a1eea9ee04886d9df37c7178e67bb30/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala#L412 https://github.com/apache/spark/blob/d5573c578a1eea9ee04886d9df37c7178e67bb30/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala#L427-L430 Tests also bear this out. ## How was this patch tested? This is just a doc string fix: I built Spark and ran a single test. Closes #22865 from bersprockets/confdocfix. Authored-by: Bruce Robbins Signed-off-by: Wenchen Fan (cherry picked from commit 4e990d9dd2407dc257712c4b12b507f0990ca4e9) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b6ba0dd4 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b6ba0dd4 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b6ba0dd4 Branch: refs/heads/branch-2.4 Commit: b6ba0dd4773d4f5de02cbb49b70182ce94899671 Parents: 00771dc Author: Bruce Robbins Authored: Mon Oct 29 13:44:58 2018 +0800 Committer: Wenchen Fan Committed: Mon Oct 29 13:45:23 2018 +0800 -- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala| 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b6ba0dd4/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index 05264d3..08def90 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -442,7 +442,8 @@ object SQLConf { val PARQUET_RECORD_FILTER_ENABLED = buildConf("spark.sql.parquet.recordLevelFilter.enabled") .doc("If true, enables Parquet's native record-level filtering using the pushed down " + "filters. This configuration only has an effect when 'spark.sql.parquet.filterPushdown' " + - "is enabled.") + "is enabled and the vectorized reader is not used. You can ensure the vectorized reader " + + "is not used by setting 'spark.sql.parquet.enableVectorizedReader' to false.") .booleanConf .createWithDefault(false) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [DOC] Fix doc for spark.sql.parquet.recordLevelFilter.enabled
Repository: spark Updated Branches: refs/heads/master ca2fca143 -> 4e990d9dd [DOC] Fix doc for spark.sql.parquet.recordLevelFilter.enabled ## What changes were proposed in this pull request? Updated the doc string value for spark.sql.parquet.recordLevelFilter.enabled to indicate that spark.sql.parquet.enableVectorizedReader must be disabled. The code in ParquetFileFormat uses spark.sql.parquet.recordLevelFilter.enabled only after falling back to parquet-mr (see else for this if statement): https://github.com/apache/spark/blob/d5573c578a1eea9ee04886d9df37c7178e67bb30/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala#L412 https://github.com/apache/spark/blob/d5573c578a1eea9ee04886d9df37c7178e67bb30/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala#L427-L430 Tests also bear this out. ## How was this patch tested? This is just a doc string fix: I built Spark and ran a single test. Closes #22865 from bersprockets/confdocfix. Authored-by: Bruce Robbins Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4e990d9d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4e990d9d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4e990d9d Branch: refs/heads/master Commit: 4e990d9dd2407dc257712c4b12b507f0990ca4e9 Parents: ca2fca1 Author: Bruce Robbins Authored: Mon Oct 29 13:44:58 2018 +0800 Committer: Wenchen Fan Committed: Mon Oct 29 13:44:58 2018 +0800 -- .../src/main/scala/org/apache/spark/sql/internal/SQLConf.scala| 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4e990d9d/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala index e852955..4edffce 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala @@ -462,7 +462,8 @@ object SQLConf { val PARQUET_RECORD_FILTER_ENABLED = buildConf("spark.sql.parquet.recordLevelFilter.enabled") .doc("If true, enables Parquet's native record-level filtering using the pushed down " + "filters. This configuration only has an effect when 'spark.sql.parquet.filterPushdown' " + - "is enabled.") + "is enabled and the vectorized reader is not used. You can ensure the vectorized reader " + + "is not used by setting 'spark.sql.parquet.enableVectorizedReader' to false.") .booleanConf .createWithDefault(false) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r30477 - in /dev/spark/2.3.3-SNAPSHOT-2018_10_28_22_02-3e0160b-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon Oct 29 05:16:34 2018 New Revision: 30477 Log: Apache Spark 2.3.3-SNAPSHOT-2018_10_28_22_02-3e0160b docs [This commit notification would consist of 1443 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25797][SQL][DOCS][BACKPORT-2.3] Add migration doc for solving issues caused by view canonicalization approach change
Repository: spark Updated Branches: refs/heads/branch-2.3 53aeb3d65 -> 3e0160bac [SPARK-25797][SQL][DOCS][BACKPORT-2.3] Add migration doc for solving issues caused by view canonicalization approach change ## What changes were proposed in this pull request? Since Spark 2.2, view definitions are stored in a different way from prior versions. This may cause Spark unable to read views created by prior versions. See [SPARK-25797](https://issues.apache.org/jira/browse/SPARK-25797) for more details. Basically, we have 2 options. 1) Make Spark 2.2+ able to get older view definitions back. Since the expanded text is buggy and unusable, we have to use original text (this is possible with [SPARK-25459](https://issues.apache.org/jira/browse/SPARK-25459)). However, because older Spark versions don't save the context for the database, we cannot always get correct view definitions without view default database. 2) Recreate the views by `ALTER VIEW AS` or `CREATE OR REPLACE VIEW AS`. This PR aims to add migration doc to help users troubleshoot this issue by above option 2. ## How was this patch tested? N/A. Docs are generated and checked locally ``` cd docs SKIP_API=1 jekyll serve --watch ``` Closes #22851 from seancxmao/SPARK-25797-2.3. Authored-by: seancxmao Signed-off-by: Dongjoon Hyun Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3e0160ba Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3e0160ba Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3e0160ba Branch: refs/heads/branch-2.3 Commit: 3e0160bacfbe4597f15ca410ca832617cdeeddca Parents: 53aeb3d Author: seancxmao Authored: Sun Oct 28 21:27:22 2018 -0700 Committer: Dongjoon Hyun Committed: Sun Oct 28 21:27:22 2018 -0700 -- docs/sql-programming-guide.md | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3e0160ba/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 461806a..e5fa4c6 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1973,6 +1973,8 @@ working with timestamps in `pandas_udf`s to get the best performance, see - Since Spark 2.2.1 and 2.3.0, the schema is always inferred at runtime when the data source tables have the columns that exist in both partition schema and data schema. The inferred schema does not have the partitioned columns. When reading the table, Spark respects the partition values of these overlapping columns instead of the values stored in the data source files. In 2.2.0 and 2.1.x release, the inferred schema is partitioned but the data of the table is invisible to users (i.e., the result set is empty). + - Since Spark 2.2, view definitions are stored in a different way from prior versions. This may cause Spark unable to read views created by prior versions. In such cases, you need to recreate the views using `ALTER VIEW AS` or `CREATE OR REPLACE VIEW AS` with newer Spark versions. + ## Upgrading From Spark SQL 2.0 to 2.1 - Datasource tables now store partition metadata in the Hive metastore. This means that Hive DDLs such as `ALTER TABLE PARTITION ... SET LOCATION` are now available for tables created with the Datasource API. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25797][SQL][DOCS][BACKPORT-2.3] Add migration doc for solving issues caused by view canonicalization approach change
Repository: spark Updated Branches: refs/heads/branch-2.2 5b1396596 -> 17d882adf [SPARK-25797][SQL][DOCS][BACKPORT-2.3] Add migration doc for solving issues caused by view canonicalization approach change ## What changes were proposed in this pull request? Since Spark 2.2, view definitions are stored in a different way from prior versions. This may cause Spark unable to read views created by prior versions. See [SPARK-25797](https://issues.apache.org/jira/browse/SPARK-25797) for more details. Basically, we have 2 options. 1) Make Spark 2.2+ able to get older view definitions back. Since the expanded text is buggy and unusable, we have to use original text (this is possible with [SPARK-25459](https://issues.apache.org/jira/browse/SPARK-25459)). However, because older Spark versions don't save the context for the database, we cannot always get correct view definitions without view default database. 2) Recreate the views by `ALTER VIEW AS` or `CREATE OR REPLACE VIEW AS`. This PR aims to add migration doc to help users troubleshoot this issue by above option 2. ## How was this patch tested? N/A. Docs are generated and checked locally ``` cd docs SKIP_API=1 jekyll serve --watch ``` Closes #22851 from seancxmao/SPARK-25797-2.3. Authored-by: seancxmao Signed-off-by: Dongjoon Hyun (cherry picked from commit 3e0160bacfbe4597f15ca410ca832617cdeeddca) Signed-off-by: Dongjoon Hyun Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/17d882ad Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/17d882ad Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/17d882ad Branch: refs/heads/branch-2.2 Commit: 17d882adf0b1bbbd4350b6d46756fab0fd602683 Parents: 5b13965 Author: seancxmao Authored: Sun Oct 28 21:27:22 2018 -0700 Committer: Dongjoon Hyun Committed: Sun Oct 28 21:27:42 2018 -0700 -- docs/sql-programming-guide.md | 2 ++ 1 file changed, 2 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/17d882ad/docs/sql-programming-guide.md -- diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md index 8cd4d05..758920e 100644 --- a/docs/sql-programming-guide.md +++ b/docs/sql-programming-guide.md @@ -1548,6 +1548,8 @@ options. - Spark 2.1.1 introduced a new configuration key: `spark.sql.hive.caseSensitiveInferenceMode`. It had a default setting of `NEVER_INFER`, which kept behavior identical to 2.1.0. However, Spark 2.2.0 changes this setting's default value to `INFER_AND_SAVE` to restore compatibility with reading Hive metastore tables whose underlying file schema have mixed-case column names. With the `INFER_AND_SAVE` configuration value, on first access Spark will perform schema inference on any Hive metastore table for which it has not already saved an inferred schema. Note that schema inference can be a very time consuming operation for tables with thousands of partitions. If compatibility with mixed-case column names is not a concern, you can safely set `spark.sql.hive.caseSensitiveInferenceMode` to `NEVER_INFER` to avoid the initial overhead of schema inference. Note that with the new default `INFER_AND_SAVE` setting, the results of the schema inference are saved as a metastore key for future use . Therefore, the initial schema inference occurs only at a table's first access. + - Since Spark 2.2, view definitions are stored in a different way from prior versions. This may cause Spark unable to read views created by prior versions. In such cases, you need to recreate the views using `ALTER VIEW AS` or `CREATE OR REPLACE VIEW AS` with newer Spark versions. + ## Upgrading From Spark SQL 2.0 to 2.1 - Datasource tables now store partition metadata in the Hive metastore. This means that Hive DDLs such as `ALTER TABLE PARTITION ... SET LOCATION` are now available for tables created with the Datasource API. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r30476 - in /dev/spark/3.0.0-SNAPSHOT-2018_10_28_20_03-ca2fca1-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon Oct 29 03:17:29 2018 New Revision: 30476 Log: Apache Spark 3.0.0-SNAPSHOT-2018_10_28_20_03-ca2fca1 docs [This commit notification would consist of 1472 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.4.0-rc5 [deleted] 4a7ead480 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r30474 - in /dev/spark/2.4.1-SNAPSHOT-2018_10_28_18_03-00771dc-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon Oct 29 01:19:30 2018 New Revision: 30474 Log: Apache Spark 2.4.1-SNAPSHOT-2018_10_28_18_03-00771dc docs [This commit notification would consist of 1477 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r30473 - in /dev/spark/2.3.3-SNAPSHOT-2018_10_28_18_02-53aeb3d-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Mon Oct 29 01:17:43 2018 New Revision: 30473 Log: Apache Spark 2.3.3-SNAPSHOT-2018_10_28_18_02-53aeb3d docs [This commit notification would consist of 1443 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25816][SQL] Fix attribute resolution in nested extractors
Repository: spark Updated Branches: refs/heads/branch-2.3 3afb3a20e -> 53aeb3d65 [SPARK-25816][SQL] Fix attribute resolution in nested extractors Extractors are made of 2 expressions, one of them defines the the value to be extract from (called `child`) and the other defines the way of extraction (called `extraction`). In this term extractors have 2 children so they shouldn't be `UnaryExpression`s. `ResolveReferences` was changed in this commit: https://github.com/apache/spark/commit/36b826f5d17ae7be89135cb2c43ff797f9e7fe48 which resulted a regression with nested extractors. An extractor need to define its children as the set of both `child` and `extraction`; and should try to resolve both in `ResolveReferences`. This PR changes `UnresolvedExtractValue` to a `BinaryExpression`. added UT Closes #22817 from peter-toth/SPARK-25816. Authored-by: Peter Toth Signed-off-by: gatorsmile (cherry picked from commit ca2fca143277deaff58a69b7f1e0360cfc70561f) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/53aeb3d6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/53aeb3d6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/53aeb3d6 Branch: refs/heads/branch-2.3 Commit: 53aeb3d6587a04b0b7f7e454fa3e2a88aee1ba98 Parents: 3afb3a2 Author: Peter Toth Authored: Sun Oct 28 17:51:35 2018 -0700 Committer: gatorsmile Committed: Sun Oct 28 17:53:35 2018 -0700 -- .../org/apache/spark/sql/catalyst/analysis/unresolved.scala | 5 - .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 7 +++ 2 files changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/53aeb3d6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index a65f58f..cc07a24 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -378,7 +378,10 @@ case class ResolvedStar(expressions: Seq[NamedExpression]) extends Star with Une * can be key of Map, index of Array, field name of Struct. */ case class UnresolvedExtractValue(child: Expression, extraction: Expression) - extends UnaryExpression with Unevaluable { + extends BinaryExpression with Unevaluable { + + override def left: Expression = child + override def right: Expression = extraction override def dataType: DataType = throw new UnresolvedException(this, "dataType") override def foldable: Boolean = throw new UnresolvedException(this, "foldable") http://git-wip-us.apache.org/repos/asf/spark/blob/53aeb3d6/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index ced53ba..2cb1bf9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2334,4 +2334,11 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { checkAnswer(df.where("(NOT a) OR a"), Seq.empty) } + + test("SPARK-25816 ResolveReferences works with nested extractors") { +val df = Seq((1, Map(1 -> "a")), (2, Map(2 -> "b"))).toDF("key", "map") +val swappedDf = df.select($"key".as("map"), $"map".as("key")) + +checkAnswer(swappedDf.filter($"key"($"map") > "a"), Row(2, Map(2 -> "b"))) + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25816][SQL] Fix attribute resolution in nested extractors
Repository: spark Updated Branches: refs/heads/master 4427a96bc -> ca2fca143 [SPARK-25816][SQL] Fix attribute resolution in nested extractors ## What changes were proposed in this pull request? Extractors are made of 2 expressions, one of them defines the the value to be extract from (called `child`) and the other defines the way of extraction (called `extraction`). In this term extractors have 2 children so they shouldn't be `UnaryExpression`s. `ResolveReferences` was changed in this commit: https://github.com/apache/spark/commit/36b826f5d17ae7be89135cb2c43ff797f9e7fe48 which resulted a regression with nested extractors. An extractor need to define its children as the set of both `child` and `extraction`; and should try to resolve both in `ResolveReferences`. This PR changes `UnresolvedExtractValue` to a `BinaryExpression`. ## How was this patch tested? added UT Closes #22817 from peter-toth/SPARK-25816. Authored-by: Peter Toth Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ca2fca14 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ca2fca14 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ca2fca14 Branch: refs/heads/master Commit: ca2fca143277deaff58a69b7f1e0360cfc70561f Parents: 4427a96 Author: Peter Toth Authored: Sun Oct 28 17:51:35 2018 -0700 Committer: gatorsmile Committed: Sun Oct 28 17:51:35 2018 -0700 -- .../org/apache/spark/sql/catalyst/analysis/unresolved.scala | 5 - .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 7 +++ 2 files changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ca2fca14/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index c1ec736..857cf38 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -407,7 +407,10 @@ case class ResolvedStar(expressions: Seq[NamedExpression]) extends Star with Une * can be key of Map, index of Array, field name of Struct. */ case class UnresolvedExtractValue(child: Expression, extraction: Expression) - extends UnaryExpression with Unevaluable { + extends BinaryExpression with Unevaluable { + + override def left: Expression = child + override def right: Expression = extraction override def dataType: DataType = throw new UnresolvedException(this, "dataType") override def foldable: Boolean = throw new UnresolvedException(this, "foldable") http://git-wip-us.apache.org/repos/asf/spark/blob/ca2fca14/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 3f9af29..a430884 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2578,4 +2578,11 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { Row ("abc", 1)) } } + + test("SPARK-25816 ResolveReferences works with nested extractors") { +val df = Seq((1, Map(1 -> "a")), (2, Map(2 -> "b"))).toDF("key", "map") +val swappedDf = df.select($"key".as("map"), $"map".as("key")) + +checkAnswer(swappedDf.filter($"key"($"map") > "a"), Row(2, Map(2 -> "b"))) + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25816][SQL] Fix attribute resolution in nested extractors
Repository: spark Updated Branches: refs/heads/branch-2.4 0f74bac64 -> 00771dced [SPARK-25816][SQL] Fix attribute resolution in nested extractors ## What changes were proposed in this pull request? Extractors are made of 2 expressions, one of them defines the the value to be extract from (called `child`) and the other defines the way of extraction (called `extraction`). In this term extractors have 2 children so they shouldn't be `UnaryExpression`s. `ResolveReferences` was changed in this commit: https://github.com/apache/spark/commit/36b826f5d17ae7be89135cb2c43ff797f9e7fe48 which resulted a regression with nested extractors. An extractor need to define its children as the set of both `child` and `extraction`; and should try to resolve both in `ResolveReferences`. This PR changes `UnresolvedExtractValue` to a `BinaryExpression`. ## How was this patch tested? added UT Closes #22817 from peter-toth/SPARK-25816. Authored-by: Peter Toth Signed-off-by: gatorsmile (cherry picked from commit ca2fca143277deaff58a69b7f1e0360cfc70561f) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/00771dce Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/00771dce Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/00771dce Branch: refs/heads/branch-2.4 Commit: 00771dced9c73cddfc6325b3ffb00b32864a02a3 Parents: 0f74bac Author: Peter Toth Authored: Sun Oct 28 17:51:35 2018 -0700 Committer: gatorsmile Committed: Sun Oct 28 17:51:53 2018 -0700 -- .../org/apache/spark/sql/catalyst/analysis/unresolved.scala | 5 - .../src/test/scala/org/apache/spark/sql/DataFrameSuite.scala | 7 +++ 2 files changed, 11 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/00771dce/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala index c1ec736..857cf38 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala @@ -407,7 +407,10 @@ case class ResolvedStar(expressions: Seq[NamedExpression]) extends Star with Une * can be key of Map, index of Array, field name of Struct. */ case class UnresolvedExtractValue(child: Expression, extraction: Expression) - extends UnaryExpression with Unevaluable { + extends BinaryExpression with Unevaluable { + + override def left: Expression = child + override def right: Expression = extraction override def dataType: DataType = throw new UnresolvedException(this, "dataType") override def foldable: Boolean = throw new UnresolvedException(this, "foldable") http://git-wip-us.apache.org/repos/asf/spark/blob/00771dce/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index e84cd8c..2ca0e5f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -2590,4 +2590,11 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { Row ("abc", 1)) } } + + test("SPARK-25816 ResolveReferences works with nested extractors") { +val df = Seq((1, Map(1 -> "a")), (2, Map(2 -> "b"))).toDF("key", "map") +val swappedDf = df.select($"key".as("map"), $"map".as("key")) + +checkAnswer(swappedDf.filter($"key"($"map") > "a"), Row(2, Map(2 -> "b"))) + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r30472 - in /dev/spark/3.0.0-SNAPSHOT-2018_10_28_16_02-4427a96-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Sun Oct 28 23:17:10 2018 New Revision: 30472 Log: Apache Spark 3.0.0-SNAPSHOT-2018_10_28_16_02-4427a96 docs [This commit notification would consist of 1472 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25806][SQL] The instance of FileSplit is redundant
Repository: spark Updated Branches: refs/heads/master a7ab7f234 -> 4427a96bc [SPARK-25806][SQL] The instance of FileSplit is redundant ## What changes were proposed in this pull request? The instance of `FileSplit` is redundant for `ParquetFileFormat` and `hive\orc\OrcFileFormat` class. ## How was this patch tested? Existing unit tests in `ParquetQuerySuite.scala` and `HiveOrcQuerySuite.scala` Closes #22802 from 10110346/FileSplitnotneed. Authored-by: liuxian Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4427a96b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4427a96b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4427a96b Branch: refs/heads/master Commit: 4427a96bcea625bc51fc5e0e999f170ad537a2fc Parents: a7ab7f2 Author: liuxian Authored: Sun Oct 28 17:39:16 2018 -0500 Committer: Sean Owen Committed: Sun Oct 28 17:39:16 2018 -0500 -- .../datasources/parquet/ParquetFileFormat.scala| 13 + .../org/apache/spark/sql/hive/orc/OrcFileFormat.scala | 3 +-- 2 files changed, 6 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4427a96b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala index ea4f159..f04502d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormat.scala @@ -352,17 +352,14 @@ class ParquetFileFormat (file: PartitionedFile) => { assert(file.partitionValues.numFields == partitionSchema.size) - val fileSplit = -new FileSplit(new Path(new URI(file.filePath)), file.start, file.length, Array.empty) - val filePath = fileSplit.getPath - + val filePath = new Path(new URI(file.filePath)) val split = new org.apache.parquet.hadoop.ParquetInputSplit( filePath, - fileSplit.getStart, - fileSplit.getStart + fileSplit.getLength, - fileSplit.getLength, - fileSplit.getLocations, + file.start, + file.start + file.length, + file.length, + Array.empty, null) val sharedConf = broadcastedHadoopConf.value.value http://git-wip-us.apache.org/repos/asf/spark/blob/4427a96b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala -- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index de8085f..89e6ea8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -154,13 +154,12 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable val job = Job.getInstance(conf) FileInputFormat.setInputPaths(job, file.filePath) - val fileSplit = new FileSplit(filePath, file.start, file.length, Array.empty) // Custom OrcRecordReader is used to get // ObjectInspector during recordReader creation itself and can // avoid NameNode call in unwrapOrcStructs per file. // Specifically would be helpful for partitioned datasets. val orcReader = OrcFile.createReader(filePath, OrcFile.readerOptions(conf)) - new SparkOrcNewRecordReader(orcReader, conf, fileSplit.getStart, fileSplit.getLength) + new SparkOrcNewRecordReader(orcReader, conf, file.start, file.length) } val recordsIterator = new RecordReaderIterator[OrcStruct](orcRecordReader) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r30461 - in /dev/spark/3.0.0-SNAPSHOT-2018_10_28_04_02-a7ab7f2-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Sun Oct 28 11:16:54 2018 New Revision: 30461 Log: Apache Spark 3.0.0-SNAPSHOT-2018_10_28_04_02-a7ab7f2 docs [This commit notification would consist of 1472 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-25845][SQL] Fix MatchError for calendar interval type in range frame left boundary
Repository: spark Updated Branches: refs/heads/master ff4bb836a -> a7ab7f234 [SPARK-25845][SQL] Fix MatchError for calendar interval type in range frame left boundary ## What changes were proposed in this pull request? WindowSpecDefinition checks start < last, but CalendarIntervalType is not comparable, so it would throw the following exception at runtime: ``` scala.MatchError: CalendarIntervalType (of class org.apache.spark.sql.types.CalendarIntervalType$) at org.apache.spark.sql.catalyst.util.TypeUtils$.getInterpretedOrdering(TypeUtils.scala:58) at org.apache.spark.sql.catalyst.expressions.BinaryComparison.ordering$lzycompute(predicates.scala:592) at org.apache.spark.sql.catalyst.expressions.BinaryComparison.ordering(predicates.scala:592) at org.apache.spark.sql.catalyst.expressions.GreaterThan.nullSafeEval(predicates.scala:797) at org.apache.spark.sql.catalyst.expressions.BinaryExpression.eval(Expression.scala:496) at org.apache.spark.sql.catalyst.expressions.SpecifiedWindowFrame.isGreaterThan(windowExpressions.scala:245) at org.apache.spark.sql.catalyst.expressions.SpecifiedWindowFrame.checkInputDataTypes(windowExpressions.scala:216) at org.apache.spark.sql.catalyst.expressions.Expression.resolved$lzycompute(Expression.scala:171) at org.apache.spark.sql.catalyst.expressions.Expression.resolved(Expression.scala:171) at org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$childrenResolved$1.apply(Expression.scala:183) at org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$childrenResolved$1.apply(Expression.scala:183) at scala.collection.IndexedSeqOptimized$class.prefixLengthImpl(IndexedSeqOptimized.scala:38) at scala.collection.IndexedSeqOptimized$class.forall(IndexedSeqOptimized.scala:43) at scala.collection.mutable.ArrayBuffer.forall(ArrayBuffer.scala:48) at org.apache.spark.sql.catalyst.expressions.Expression.childrenResolved(Expression.scala:183) at org.apache.spark.sql.catalyst.expressions.WindowSpecDefinition.resolved$lzycompute(windowExpressions.scala:48) at org.apache.spark.sql.catalyst.expressions.WindowSpecDefinition.resolved(windowExpressions.scala:48) at org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$childrenResolved$1.apply(Expression.scala:183) at org.apache.spark.sql.catalyst.expressions.Expression$$anonfun$childrenResolved$1.apply(Expression.scala:183) at scala.collection.LinearSeqOptimized$class.forall(LinearSeqOptimized.scala:83) ``` We fix the issue by only perform the check on boundary expressions that are AtomicType. ## How was this patch tested? Add new test case in `DataFrameWindowFramesSuite` Closes #22853 from jiangxb1987/windowBoundary. Authored-by: Xingbo Jiang Signed-off-by: Xingbo Jiang Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a7ab7f23 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a7ab7f23 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a7ab7f23 Branch: refs/heads/master Commit: a7ab7f2348cfcd665f7815f5a9ae4d9a48383b5d Parents: ff4bb83 Author: Xingbo Jiang Authored: Sun Oct 28 18:15:47 2018 +0800 Committer: Xingbo Jiang Committed: Sun Oct 28 18:15:47 2018 +0800 -- .../catalyst/expressions/windowExpressions.scala | 8 ++-- .../spark/sql/DataFrameWindowFramesSuite.scala | 19 +++ 2 files changed, 25 insertions(+), 2 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a7ab7f23/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index 707f312..7de6ddd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -242,8 +242,12 @@ case class SpecifiedWindowFrame( case e: Expression => e.sql + " FOLLOWING" } - private def isGreaterThan(l: Expression, r: Expression): Boolean = { -GreaterThan(l, r).eval().asInstanceOf[Boolean] + // Check whether the left boundary value is greater than the right boundary value. It's required + // that the both expressions have the same data type. + // Since CalendarIntervalType is not comparable, we only compare expressions that are AtomicType. + private def isGreaterThan(l: Expression, r: Expression): Boolean = l.dataType match { +case _: AtomicType => GreaterThan(l, r).eval().asInstanceOf[Boolean] +case _ => false } private def checkBoundary(b: Expression, location: Stri
svn commit: r30458 - in /dev/spark/3.0.0-SNAPSHOT-2018_10_28_00_02-ff4bb83-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Sun Oct 28 07:17:06 2018 New Revision: 30458 Log: Apache Spark 3.0.0-SNAPSHOT-2018_10_28_00_02-ff4bb83 docs [This commit notification would consist of 1472 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org