spark git commit: [SPARK-16036][SPARK-16037][SPARK-16034][SQL] Follow up code clean up and improvement
Repository: spark Updated Branches: refs/heads/branch-2.0 d11f533de -> 19397caab [SPARK-16036][SPARK-16037][SPARK-16034][SQL] Follow up code clean up and improvement ## What changes were proposed in this pull request? This PR is the follow-up PR for https://github.com/apache/spark/pull/13754/files and https://github.com/apache/spark/pull/13749. I will comment inline to explain my changes. ## How was this patch tested? Existing tests. Author: Yin Huai Closes #13766 from yhuai/caseSensitivity. (cherry picked from commit 6d0f921aedfdd3b7e8472b6776d0c7d8299190bd) Signed-off-by: Yin Huai Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/19397caa Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/19397caa Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/19397caa Branch: refs/heads/branch-2.0 Commit: 19397caab62b550269961a123bd11a34afc3a09b Parents: d11f533 Author: Yin Huai Authored: Sun Jun 19 21:45:53 2016 -0700 Committer: Yin Huai Committed: Sun Jun 19 21:46:14 2016 -0700 -- .../plans/logical/basicLogicalOperators.scala | 2 ++ .../org/apache/spark/sql/DataFrameWriter.scala | 24 -- .../sql/execution/datasources/DataSource.scala | 9 ++--- .../spark/sql/execution/datasources/rules.scala | 13 ++-- .../spark/sql/internal/SessionState.scala | 2 +- .../spark/sql/execution/command/DDLSuite.scala | 20 +-- .../spark/sql/hive/HiveSessionState.scala | 2 +- .../sql/hive/InsertIntoHiveTableSuite.scala | 20 +++ .../sql/hive/execution/HiveQuerySuite.scala | 35 .../sql/hive/execution/SQLQuerySuite.scala | 32 -- 10 files changed, 98 insertions(+), 61 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/19397caa/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 69b8b05..ff3dcbc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -369,6 +369,8 @@ case class InsertIntoTable( if (table.output.isEmpty) { None } else { + // Note: The parser (visitPartitionSpec in AstBuilder) already turns + // keys in partition to their lowercase forms. val staticPartCols = partition.filter(_._2.isDefined).keySet Some(table.output.filterNot(a => staticPartCols.contains(a.name))) } http://git-wip-us.apache.org/repos/asf/spark/blob/19397caa/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index e6fc974..ca3972d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -245,29 +245,17 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { if (partitioningColumns.isDefined) { throw new AnalysisException( "insertInto() can't be used together with partitionBy(). " + - "Partition columns are defined by the table into which is being inserted." + "Partition columns have already be defined for the table. " + + "It is not necessary to use partitionBy()." ) } -val partitions = normalizedParCols.map(_.map(col => col -> Option.empty[String]).toMap) -val overwrite = mode == SaveMode.Overwrite - -// A partitioned relation's schema can be different from the input logicalPlan, since -// partition columns are all moved after data columns. We Project to adjust the ordering. -// TODO: this belongs to the analyzer. -val input = normalizedParCols.map { parCols => - val (inputPartCols, inputDataCols) = df.logicalPlan.output.partition { attr => -parCols.contains(attr.name) - } - Project(inputDataCols ++ inputPartCols, df.logicalPlan) -}.getOrElse(df.logicalPlan) - df.sparkSession.sessionState.executePlan( InsertIntoTable( -UnresolvedRelation(tableIdent), -partitions.getOrElse(Map.empty[String, Option[String]]), -input, -overwrite, +table = UnresolvedRelation(tableIdent), +partition = Map.empty[String, Option[String]], +child = df.logicalP
spark git commit: [SPARK-16036][SPARK-16037][SPARK-16034][SQL] Follow up code clean up and improvement
Repository: spark Updated Branches: refs/heads/master 4f17fddcd -> 6d0f921ae [SPARK-16036][SPARK-16037][SPARK-16034][SQL] Follow up code clean up and improvement ## What changes were proposed in this pull request? This PR is the follow-up PR for https://github.com/apache/spark/pull/13754/files and https://github.com/apache/spark/pull/13749. I will comment inline to explain my changes. ## How was this patch tested? Existing tests. Author: Yin Huai Closes #13766 from yhuai/caseSensitivity. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6d0f921a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6d0f921a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6d0f921a Branch: refs/heads/master Commit: 6d0f921aedfdd3b7e8472b6776d0c7d8299190bd Parents: 4f17fdd Author: Yin Huai Authored: Sun Jun 19 21:45:53 2016 -0700 Committer: Yin Huai Committed: Sun Jun 19 21:45:53 2016 -0700 -- .../plans/logical/basicLogicalOperators.scala | 2 ++ .../org/apache/spark/sql/DataFrameWriter.scala | 24 -- .../sql/execution/datasources/DataSource.scala | 9 ++--- .../spark/sql/execution/datasources/rules.scala | 13 ++-- .../spark/sql/internal/SessionState.scala | 2 +- .../spark/sql/execution/command/DDLSuite.scala | 20 +-- .../spark/sql/hive/HiveSessionState.scala | 2 +- .../sql/hive/InsertIntoHiveTableSuite.scala | 20 +++ .../sql/hive/execution/HiveQuerySuite.scala | 35 .../sql/hive/execution/SQLQuerySuite.scala | 32 -- 10 files changed, 98 insertions(+), 61 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6d0f921a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala index 69b8b05..ff3dcbc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala @@ -369,6 +369,8 @@ case class InsertIntoTable( if (table.output.isEmpty) { None } else { + // Note: The parser (visitPartitionSpec in AstBuilder) already turns + // keys in partition to their lowercase forms. val staticPartCols = partition.filter(_._2.isDefined).keySet Some(table.output.filterNot(a => staticPartCols.contains(a.name))) } http://git-wip-us.apache.org/repos/asf/spark/blob/6d0f921a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index e6fc974..ca3972d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -245,29 +245,17 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { if (partitioningColumns.isDefined) { throw new AnalysisException( "insertInto() can't be used together with partitionBy(). " + - "Partition columns are defined by the table into which is being inserted." + "Partition columns have already be defined for the table. " + + "It is not necessary to use partitionBy()." ) } -val partitions = normalizedParCols.map(_.map(col => col -> Option.empty[String]).toMap) -val overwrite = mode == SaveMode.Overwrite - -// A partitioned relation's schema can be different from the input logicalPlan, since -// partition columns are all moved after data columns. We Project to adjust the ordering. -// TODO: this belongs to the analyzer. -val input = normalizedParCols.map { parCols => - val (inputPartCols, inputDataCols) = df.logicalPlan.output.partition { attr => -parCols.contains(attr.name) - } - Project(inputDataCols ++ inputPartCols, df.logicalPlan) -}.getOrElse(df.logicalPlan) - df.sparkSession.sessionState.executePlan( InsertIntoTable( -UnresolvedRelation(tableIdent), -partitions.getOrElse(Map.empty[String, Option[String]]), -input, -overwrite, +table = UnresolvedRelation(tableIdent), +partition = Map.empty[String, Option[String]], +child = df.logicalPlan, +overwrite = mode == SaveMode.Overwrite, ifNotExists = false)).toRdd } http
spark git commit: [SPARK-16031] Add debug-only socket source in Structured Streaming
Repository: spark Updated Branches: refs/heads/master 5930d7a2e -> 4f17fddcd [SPARK-16031] Add debug-only socket source in Structured Streaming ## What changes were proposed in this pull request? This patch adds a text-based socket source similar to the one in Spark Streaming for debugging and tutorials. The source is clearly marked as debug-only so that users don't try to run it in production applications, because this type of source cannot provide HA without storing a lot of state in Spark. ## How was this patch tested? Unit tests and manual tests in spark-shell. Author: Matei Zaharia Closes #13748 from mateiz/socket-source. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4f17fddc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4f17fddc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4f17fddc Branch: refs/heads/master Commit: 4f17fddcd57adeae0d7e31bd14423283d4b625e9 Parents: 5930d7a Author: Matei Zaharia Authored: Sun Jun 19 21:27:04 2016 -0700 Committer: Reynold Xin Committed: Sun Jun 19 21:27:04 2016 -0700 -- apache.spark.sql.sources.DataSourceRegister | 1 + .../execution/streaming/FileStreamSource.scala | 2 + .../spark/sql/execution/streaming/Source.scala | 3 + .../execution/streaming/StreamExecution.scala | 1 + .../spark/sql/execution/streaming/memory.scala | 2 + .../spark/sql/execution/streaming/socket.scala | 144 +++ .../streaming/TextSocketStreamSuite.scala | 136 ++ .../spark/sql/streaming/StreamSuite.scala | 2 + .../test/DataStreamReaderWriterSuite.scala | 2 + 9 files changed, 293 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4f17fddc/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister -- diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister index 9f8bb5d..27d32b5 100644 --- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister +++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -4,3 +4,4 @@ org.apache.spark.sql.execution.datasources.json.JsonFileFormat org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat org.apache.spark.sql.execution.datasources.text.TextFileFormat org.apache.spark.sql.execution.streaming.ConsoleSinkProvider +org.apache.spark.sql.execution.streaming.TextSocketSourceProvider http://git-wip-us.apache.org/repos/asf/spark/blob/4f17fddc/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala index bef5616..9886ad0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala @@ -128,4 +128,6 @@ class FileStreamSource( override def getOffset: Option[Offset] = Some(fetchMaxOffset()).filterNot(_.offset == -1) override def toString: String = s"FileStreamSource[$qualifiedBasePath]" + + override def stop() {} } http://git-wip-us.apache.org/repos/asf/spark/blob/4f17fddc/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala index 14450c2..9711478 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala @@ -39,4 +39,7 @@ trait Source { * same data for a particular `start` and `end` pair. */ def getBatch(start: Option[Offset], end: Offset): DataFrame + + /** Stop this source and free any resources it has allocated. */ + def stop(): Unit } http://git-wip-us.apache.org/repos/asf/spark/blob/4f17fddc/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala index 4a
spark git commit: [SPARK-16031] Add debug-only socket source in Structured Streaming
Repository: spark Updated Branches: refs/heads/branch-2.0 80c6d4e3a -> d11f533de [SPARK-16031] Add debug-only socket source in Structured Streaming ## What changes were proposed in this pull request? This patch adds a text-based socket source similar to the one in Spark Streaming for debugging and tutorials. The source is clearly marked as debug-only so that users don't try to run it in production applications, because this type of source cannot provide HA without storing a lot of state in Spark. ## How was this patch tested? Unit tests and manual tests in spark-shell. Author: Matei Zaharia Closes #13748 from mateiz/socket-source. (cherry picked from commit 4f17fddcd57adeae0d7e31bd14423283d4b625e9) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d11f533d Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d11f533d Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d11f533d Branch: refs/heads/branch-2.0 Commit: d11f533ded502c6cc4a129e201362bca6e302028 Parents: 80c6d4e Author: Matei Zaharia Authored: Sun Jun 19 21:27:04 2016 -0700 Committer: Reynold Xin Committed: Sun Jun 19 21:27:11 2016 -0700 -- apache.spark.sql.sources.DataSourceRegister | 1 + .../execution/streaming/FileStreamSource.scala | 2 + .../spark/sql/execution/streaming/Source.scala | 3 + .../execution/streaming/StreamExecution.scala | 1 + .../spark/sql/execution/streaming/memory.scala | 2 + .../spark/sql/execution/streaming/socket.scala | 144 +++ .../streaming/TextSocketStreamSuite.scala | 136 ++ .../spark/sql/streaming/StreamSuite.scala | 2 + .../test/DataStreamReaderWriterSuite.scala | 2 + 9 files changed, 293 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d11f533d/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister -- diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister index 9f8bb5d..27d32b5 100644 --- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister +++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister @@ -4,3 +4,4 @@ org.apache.spark.sql.execution.datasources.json.JsonFileFormat org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat org.apache.spark.sql.execution.datasources.text.TextFileFormat org.apache.spark.sql.execution.streaming.ConsoleSinkProvider +org.apache.spark.sql.execution.streaming.TextSocketSourceProvider http://git-wip-us.apache.org/repos/asf/spark/blob/d11f533d/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala index bef5616..9886ad0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSource.scala @@ -128,4 +128,6 @@ class FileStreamSource( override def getOffset: Option[Offset] = Some(fetchMaxOffset()).filterNot(_.offset == -1) override def toString: String = s"FileStreamSource[$qualifiedBasePath]" + + override def stop() {} } http://git-wip-us.apache.org/repos/asf/spark/blob/d11f533d/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala index 14450c2..9711478 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/Source.scala @@ -39,4 +39,7 @@ trait Source { * same data for a particular `start` and `end` pair. */ def getBatch(start: Option[Offset], end: Offset): DataFrame + + /** Stop this source and free any resources it has allocated. */ + def stop(): Unit } http://git-wip-us.apache.org/repos/asf/spark/blob/d11f533d/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution
[1/2] spark git commit: Preparing Spark release v1.6.2-rc2
Repository: spark Updated Branches: refs/heads/branch-1.6 3d569d9ea -> 208348595 Preparing Spark release v1.6.2-rc2 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54b1121f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54b1121f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54b1121f Branch: refs/heads/branch-1.6 Commit: 54b1121f351f056d6b67d2bb4efe0d553c0f7482 Parents: 3d569d9 Author: Patrick Wendell Authored: Sun Jun 19 14:06:21 2016 -0700 Committer: Patrick Wendell Committed: Sun Jun 19 14:06:21 2016 -0700 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- docker-integration-tests/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tags/pom.xml| 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 35 files changed, 35 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 6ec2ca4..438e6ed 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.3-SNAPSHOT +1.6.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 2d778c5..85be37f 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.3-SNAPSHOT +1.6.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index a8d7863..15e60a3 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.3-SNAPSHOT +1.6.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/docker-integration-tests/pom.xml -- diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml index a06e59c..0bc749f 100644 --- a/docker-integration-tests/pom.xml +++ b/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 -1.6.3-SNAPSHOT +1.6.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index 8e9e02e..f771a36 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.3-SNAPSHOT +1.6.2 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 52c8a91..1ef7e7f 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.3-SNAPSHOT +1.6.2 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/54b1121f/external/flume-sink/pom.xml -- diff --gi
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v1.6.2-rc2 [created] 54b1121f3 - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: Preparing development version 1.6.3-SNAPSHOT
Preparing development version 1.6.3-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/20834859 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/20834859 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/20834859 Branch: refs/heads/branch-1.6 Commit: 20834859517438124b94a90a27014cc932d1eeb6 Parents: 54b1121 Author: Patrick Wendell Authored: Sun Jun 19 14:06:28 2016 -0700 Committer: Patrick Wendell Committed: Sun Jun 19 14:06:28 2016 -0700 -- assembly/pom.xml| 2 +- bagel/pom.xml | 2 +- core/pom.xml| 2 +- docker-integration-tests/pom.xml| 2 +- examples/pom.xml| 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/kafka-assembly/pom.xml | 2 +- external/kafka/pom.xml | 2 +- external/mqtt-assembly/pom.xml | 2 +- external/mqtt/pom.xml | 2 +- external/twitter/pom.xml| 2 +- external/zeromq/pom.xml | 2 +- extras/java8-tests/pom.xml | 2 +- extras/kinesis-asl-assembly/pom.xml | 2 +- extras/kinesis-asl/pom.xml | 2 +- extras/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml| 2 +- mllib/pom.xml | 2 +- network/common/pom.xml | 2 +- network/shuffle/pom.xml | 2 +- network/yarn/pom.xml| 2 +- pom.xml | 2 +- repl/pom.xml| 2 +- sql/catalyst/pom.xml| 2 +- sql/core/pom.xml| 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml| 2 +- streaming/pom.xml | 2 +- tags/pom.xml| 2 +- tools/pom.xml | 2 +- unsafe/pom.xml | 2 +- yarn/pom.xml| 2 +- 35 files changed, 35 insertions(+), 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index 438e6ed..6ec2ca4 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.2 +1.6.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/bagel/pom.xml -- diff --git a/bagel/pom.xml b/bagel/pom.xml index 85be37f..2d778c5 100644 --- a/bagel/pom.xml +++ b/bagel/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.2 +1.6.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/core/pom.xml -- diff --git a/core/pom.xml b/core/pom.xml index 15e60a3..a8d7863 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.2 +1.6.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/docker-integration-tests/pom.xml -- diff --git a/docker-integration-tests/pom.xml b/docker-integration-tests/pom.xml index 0bc749f..a06e59c 100644 --- a/docker-integration-tests/pom.xml +++ b/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.10 -1.6.2 +1.6.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/examples/pom.xml -- diff --git a/examples/pom.xml b/examples/pom.xml index f771a36..8e9e02e 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.2 +1.6.3-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/external/flume-assembly/pom.xml -- diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 1ef7e7f..52c8a91 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.10 -1.6.2 +1.6.3-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/20834859/external/flume-sink/pom.xml -- diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index c1cb64b
spark git commit: [SPARK-16040][MLLIB][DOC] spark.mllib PIC document extra line of refernece
Repository: spark Updated Branches: refs/heads/branch-2.0 2c1c337ba -> 80c6d4e3a [SPARK-16040][MLLIB][DOC] spark.mllib PIC document extra line of refernece ## What changes were proposed in this pull request? In the 2.0 document, Line "A full example that produces the experiment described in the PIC paper can be found under examples/." is redundant. There is already "Find full example code at "examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala" in the Spark repo.". We should remove the first line, which is consistent with other documents. ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) Manual test Author: wm...@hotmail.com Closes #13755 from wangmiao1981/doc. (cherry picked from commit 5930d7a2e95b2fe4d470cf39546e5a12306553fe) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/80c6d4e3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/80c6d4e3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/80c6d4e3 Branch: refs/heads/branch-2.0 Commit: 80c6d4e3a49fad4dac46738fe5458641f21b96a1 Parents: 2c1c337 Author: wm...@hotmail.com Authored: Sun Jun 19 20:19:40 2016 +0100 Committer: Sean Owen Committed: Sun Jun 19 20:19:48 2016 +0100 -- docs/mllib-clustering.md | 4 1 file changed, 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/80c6d4e3/docs/mllib-clustering.md -- diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md index 6897ba4..073927c 100644 --- a/docs/mllib-clustering.md +++ b/docs/mllib-clustering.md @@ -170,10 +170,6 @@ which contains the computed clustering assignments. Refer to the [`PowerIterationClustering` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.PowerIterationClustering) and [`PowerIterationClusteringModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.PowerIterationClusteringModel) for details on the API. {% include_example scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala %} - -A full example that produces the experiment described in the PIC paper can be found under -[`examples/`](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala). - - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-16040][MLLIB][DOC] spark.mllib PIC document extra line of refernece
Repository: spark Updated Branches: refs/heads/master 1b3a9b966 -> 5930d7a2e [SPARK-16040][MLLIB][DOC] spark.mllib PIC document extra line of refernece ## What changes were proposed in this pull request? In the 2.0 document, Line "A full example that produces the experiment described in the PIC paper can be found under examples/." is redundant. There is already "Find full example code at "examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala" in the Spark repo.". We should remove the first line, which is consistent with other documents. ## How was this patch tested? (Please explain how this patch was tested. E.g. unit tests, integration tests, manual tests) Manual test Author: wm...@hotmail.com Closes #13755 from wangmiao1981/doc. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5930d7a2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5930d7a2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5930d7a2 Branch: refs/heads/master Commit: 5930d7a2e95b2fe4d470cf39546e5a12306553fe Parents: 1b3a9b9 Author: wm...@hotmail.com Authored: Sun Jun 19 20:19:40 2016 +0100 Committer: Sean Owen Committed: Sun Jun 19 20:19:40 2016 +0100 -- docs/mllib-clustering.md | 4 1 file changed, 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5930d7a2/docs/mllib-clustering.md -- diff --git a/docs/mllib-clustering.md b/docs/mllib-clustering.md index 6897ba4..073927c 100644 --- a/docs/mllib-clustering.md +++ b/docs/mllib-clustering.md @@ -170,10 +170,6 @@ which contains the computed clustering assignments. Refer to the [`PowerIterationClustering` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.PowerIterationClustering) and [`PowerIterationClusteringModel` Scala docs](api/scala/index.html#org.apache.spark.mllib.clustering.PowerIterationClusteringModel) for details on the API. {% include_example scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala %} - -A full example that produces the experiment described in the PIC paper can be found under -[`examples/`](https://github.com/apache/spark/blob/master/examples/src/main/scala/org/apache/spark/examples/mllib/PowerIterationClusteringExample.scala). - - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-15942][REPL] Unblock `:reset` command in REPL.
Repository: spark Updated Branches: refs/heads/branch-2.0 dc85bd0a0 -> 2c1c337ba [SPARK-15942][REPL] Unblock `:reset` command in REPL. ## What changes were proposed in this pull (Paste from JIRA issue.) As a follow up for SPARK-15697, I have following semantics for `:reset` command. On `:reset` we forget all that user has done but not the initialization of spark. To avoid confusion or make it more clear, we show the message `spark` and `sc` are not erased, infact they are in same state as they were left by previous operations done by the user. While doing above, somewhere I felt that this is not usually what reset means. But an accidental shutdown of a cluster can be very costly, so may be in that sense this is less surprising and still useful. ## How was this patch tested? Manually, by calling `:reset` command, by both altering the state of SparkContext and creating some local variables. Author: Prashant Sharma Author: Prashant Sharma Closes #13661 from ScrapCodes/repl-reset-command. (cherry picked from commit 1b3a9b966a7813e2406dfb020e83605af22f9ef3) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2c1c337b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2c1c337b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2c1c337b Branch: refs/heads/branch-2.0 Commit: 2c1c337ba5984b9e495b4d02bf865e56fd83ab03 Parents: dc85bd0 Author: Prashant Sharma Authored: Sun Jun 19 20:12:00 2016 +0100 Committer: Sean Owen Committed: Sun Jun 19 20:12:08 2016 +0100 -- .../scala/org/apache/spark/repl/SparkILoop.scala| 16 ++-- .../scala/org/apache/spark/repl/ReplSuite.scala | 3 ++- 2 files changed, 16 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2c1c337b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala -- diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala index dcf3209..2707b08 100644 --- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -36,7 +36,11 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) def initializeSpark() { intp.beQuietDuring { processLine(""" -@transient val spark = org.apache.spark.repl.Main.createSparkSession() +@transient val spark = if (org.apache.spark.repl.Main.sparkSession != null) { +org.apache.spark.repl.Main.sparkSession + } else { +org.apache.spark.repl.Main.createSparkSession() + } @transient val sc = { val _sc = spark.sparkContext _sc.uiWebUrl.foreach(webUrl => println(s"Spark context Web UI available at ${webUrl}")) @@ -50,6 +54,7 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) processLine("import spark.implicits._") processLine("import spark.sql") processLine("import org.apache.spark.sql.functions._") + replayCommandStack = Nil // remove above commands from session history. } } @@ -70,7 +75,8 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) echo("Type :help for more information.") } - private val blockedCommands = Set[String]("reset") + /** Add repl commands that needs to be blocked. e.g. reset */ + private val blockedCommands = Set[String]() /** Standard commands */ lazy val sparkStandardCommands: List[SparkILoop.this.LoopCommand] = @@ -88,6 +94,12 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) initializeSpark() super.loadFiles(settings) } + + override def resetCommand(line: String): Unit = { +super.resetCommand(line) +initializeSpark() +echo("Note that after :reset, state of SparkSession and SparkContext is unchanged.") + } } object SparkILoop { http://git-wip-us.apache.org/repos/asf/spark/blob/2c1c337b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala -- diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala index 2444e93..c10db94 100644 --- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -49,7 +49,8 @@ class ReplSuite extends SparkFunSuite { val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH) System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath) - +Main.sparkContext = null +Main.spar
spark git commit: [SPARK-15942][REPL] Unblock `:reset` command in REPL.
Repository: spark Updated Branches: refs/heads/master 001a58960 -> 1b3a9b966 [SPARK-15942][REPL] Unblock `:reset` command in REPL. ## What changes were proposed in this pull (Paste from JIRA issue.) As a follow up for SPARK-15697, I have following semantics for `:reset` command. On `:reset` we forget all that user has done but not the initialization of spark. To avoid confusion or make it more clear, we show the message `spark` and `sc` are not erased, infact they are in same state as they were left by previous operations done by the user. While doing above, somewhere I felt that this is not usually what reset means. But an accidental shutdown of a cluster can be very costly, so may be in that sense this is less surprising and still useful. ## How was this patch tested? Manually, by calling `:reset` command, by both altering the state of SparkContext and creating some local variables. Author: Prashant Sharma Author: Prashant Sharma Closes #13661 from ScrapCodes/repl-reset-command. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1b3a9b96 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1b3a9b96 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1b3a9b96 Branch: refs/heads/master Commit: 1b3a9b966a7813e2406dfb020e83605af22f9ef3 Parents: 001a589 Author: Prashant Sharma Authored: Sun Jun 19 20:12:00 2016 +0100 Committer: Sean Owen Committed: Sun Jun 19 20:12:00 2016 +0100 -- .../scala/org/apache/spark/repl/SparkILoop.scala| 16 ++-- .../scala/org/apache/spark/repl/ReplSuite.scala | 3 ++- 2 files changed, 16 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1b3a9b96/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala -- diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala index dcf3209..2707b08 100644 --- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -36,7 +36,11 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) def initializeSpark() { intp.beQuietDuring { processLine(""" -@transient val spark = org.apache.spark.repl.Main.createSparkSession() +@transient val spark = if (org.apache.spark.repl.Main.sparkSession != null) { +org.apache.spark.repl.Main.sparkSession + } else { +org.apache.spark.repl.Main.createSparkSession() + } @transient val sc = { val _sc = spark.sparkContext _sc.uiWebUrl.foreach(webUrl => println(s"Spark context Web UI available at ${webUrl}")) @@ -50,6 +54,7 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) processLine("import spark.implicits._") processLine("import spark.sql") processLine("import org.apache.spark.sql.functions._") + replayCommandStack = Nil // remove above commands from session history. } } @@ -70,7 +75,8 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) echo("Type :help for more information.") } - private val blockedCommands = Set[String]("reset") + /** Add repl commands that needs to be blocked. e.g. reset */ + private val blockedCommands = Set[String]() /** Standard commands */ lazy val sparkStandardCommands: List[SparkILoop.this.LoopCommand] = @@ -88,6 +94,12 @@ class SparkILoop(in0: Option[BufferedReader], out: JPrintWriter) initializeSpark() super.loadFiles(settings) } + + override def resetCommand(line: String): Unit = { +super.resetCommand(line) +initializeSpark() +echo("Note that after :reset, state of SparkSession and SparkContext is unchanged.") + } } object SparkILoop { http://git-wip-us.apache.org/repos/asf/spark/blob/1b3a9b96/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala -- diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala index 2444e93..c10db94 100644 --- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -49,7 +49,8 @@ class ReplSuite extends SparkFunSuite { val oldExecutorClasspath = System.getProperty(CONF_EXECUTOR_CLASSPATH) System.setProperty(CONF_EXECUTOR_CLASSPATH, classpath) - +Main.sparkContext = null +Main.sparkSession = null // causes recreation of SparkContext for each test. Main.conf.set("spark.master",
spark git commit: Revert "[SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time"
Repository: spark Updated Branches: refs/heads/branch-1.6 41efd2091 -> 3d569d9ea Revert "[SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time" This reverts commit 41efd2091781b31118c6d37be59e4f0f4ae2bf66. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d569d9e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d569d9e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d569d9e Branch: refs/heads/branch-1.6 Commit: 3d569d9ea9357d6161b0c75ce2e6f045c3447458 Parents: 41efd20 Author: Davies Liu Authored: Sun Jun 19 09:30:59 2016 -0700 Committer: Davies Liu Committed: Sun Jun 19 09:30:59 2016 -0700 -- .../spark/sql/catalyst/util/DateTimeUtils.scala | 51 ++-- .../org/apache/spark/sql/types/DateType.scala | 2 +- .../sql/catalyst/util/DateTimeTestUtils.scala | 40 --- .../sql/catalyst/util/DateTimeUtilsSuite.scala | 40 --- 4 files changed, 4 insertions(+), 129 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3d569d9e/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 94692d8..2b93882 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -89,8 +89,8 @@ object DateTimeUtils { // reverse of millisToDays def daysToMillis(days: SQLDate): Long = { -val millisLocal = days.toLong * MILLIS_PER_DAY -millisLocal - getOffsetFromLocalMillis(millisLocal, threadLocalLocalTimeZone.get()) +val millisUtc = days.toLong * MILLIS_PER_DAY +millisUtc - threadLocalLocalTimeZone.get().getOffset(millisUtc) } def dateToString(days: SQLDate): String = @@ -820,41 +820,6 @@ object DateTimeUtils { } /** - * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in given timezone. - */ - private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = { -var guess = tz.getRawOffset -// the actual offset should be calculated based on milliseconds in UTC -val offset = tz.getOffset(millisLocal - guess) -if (offset != guess) { - guess = tz.getOffset(millisLocal - offset) - if (guess != offset) { -// fallback to do the reverse lookup using java.sql.Timestamp -// this should only happen near the start or end of DST -val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt -val year = getYear(days) -val month = getMonth(days) -val day = getDayOfMonth(days) - -var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt -if (millisOfDay < 0) { - millisOfDay += MILLIS_PER_DAY.toInt -} -val seconds = (millisOfDay / 1000L).toInt -val hh = seconds / 3600 -val mm = seconds / 60 % 60 -val ss = seconds % 60 -val nano = millisOfDay % 1000 * 100 - -// create a Timestamp to get the unix timestamp (in UTC) -val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, nano) -guess = (millisLocal - timestamp.getTime).toInt - } -} -guess - } - - /** * Returns a timestamp of given timezone from utc timestamp, with the same string * representation in their timezone. */ @@ -870,17 +835,7 @@ object DateTimeUtils { */ def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = { val tz = TimeZone.getTimeZone(timeZone) -val offset = getOffsetFromLocalMillis(time / 1000L, tz) +val offset = tz.getOffset(time / 1000L) time - offset * 1000L } - - /** - * Re-initialize the current thread's thread locals. Exposed for testing. - */ - private[util] def resetThreadLocals(): Unit = { -threadLocalGmtCalendar.remove() -threadLocalLocalTimeZone.remove() -threadLocalTimestampFormat.remove() -threadLocalDateFormat.remove() - } } http://git-wip-us.apache.org/repos/asf/spark/blob/3d569d9e/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala index 2c96623..1d73e40 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DateType.scala @@ -30,7 +30,7 @@ import org.apache
spark git commit: [SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time
Repository: spark Updated Branches: refs/heads/branch-1.6 3f1d730e9 -> 41efd2091 [SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time ## What changes were proposed in this pull request? Internally, we use Int to represent a date (the days since 1970-01-01), when we convert that into unix timestamp (milli-seconds since epoch in UTC), we get the offset of a timezone using local millis (the milli-seconds since 1970-01-01 in a timezone), but TimeZone.getOffset() expect unix timestamp, the result could be off by one hour (in Daylight Saving Time (DST) or not). This PR change to use best effort approximate of posix timestamp to lookup the offset. In the event of changing of DST, Some time is not defined (for example, 2016-03-13 02:00:00 PST), or could lead to multiple valid result in UTC (for example, 2016-11-06 01:00:00), this best effort approximate should be enough in practice. ## How was this patch tested? Added regression tests. Author: Davies Liu Closes #13652 from davies/fix_timezone. (cherry picked from commit 001a58960311b07fe80e2f01e473f4987948d06e) Signed-off-by: Davies Liu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/41efd209 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/41efd209 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/41efd209 Branch: refs/heads/branch-1.6 Commit: 41efd2091781b31118c6d37be59e4f0f4ae2bf66 Parents: 3f1d730 Author: Davies Liu Authored: Sun Jun 19 00:34:52 2016 -0700 Committer: Davies Liu Committed: Sun Jun 19 00:35:17 2016 -0700 -- .../spark/sql/catalyst/util/DateTimeUtils.scala | 51 ++-- .../org/apache/spark/sql/types/DateType.scala | 2 +- .../sql/catalyst/util/DateTimeTestUtils.scala | 40 +++ .../sql/catalyst/util/DateTimeUtilsSuite.scala | 40 +++ 4 files changed, 129 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/41efd209/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index 2b93882..94692d8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -89,8 +89,8 @@ object DateTimeUtils { // reverse of millisToDays def daysToMillis(days: SQLDate): Long = { -val millisUtc = days.toLong * MILLIS_PER_DAY -millisUtc - threadLocalLocalTimeZone.get().getOffset(millisUtc) +val millisLocal = days.toLong * MILLIS_PER_DAY +millisLocal - getOffsetFromLocalMillis(millisLocal, threadLocalLocalTimeZone.get()) } def dateToString(days: SQLDate): String = @@ -820,6 +820,41 @@ object DateTimeUtils { } /** + * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in given timezone. + */ + private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = { +var guess = tz.getRawOffset +// the actual offset should be calculated based on milliseconds in UTC +val offset = tz.getOffset(millisLocal - guess) +if (offset != guess) { + guess = tz.getOffset(millisLocal - offset) + if (guess != offset) { +// fallback to do the reverse lookup using java.sql.Timestamp +// this should only happen near the start or end of DST +val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt +val year = getYear(days) +val month = getMonth(days) +val day = getDayOfMonth(days) + +var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt +if (millisOfDay < 0) { + millisOfDay += MILLIS_PER_DAY.toInt +} +val seconds = (millisOfDay / 1000L).toInt +val hh = seconds / 3600 +val mm = seconds / 60 % 60 +val ss = seconds % 60 +val nano = millisOfDay % 1000 * 100 + +// create a Timestamp to get the unix timestamp (in UTC) +val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, nano) +guess = (millisLocal - timestamp.getTime).toInt + } +} +guess + } + + /** * Returns a timestamp of given timezone from utc timestamp, with the same string * representation in their timezone. */ @@ -835,7 +870,17 @@ object DateTimeUtils { */ def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = { val tz = TimeZone.getTimeZone(timeZone) -val offset = tz.getOffset(time / 1000L) +val offset = getOffsetFromLocalMillis(time / 1000L, tz)
spark git commit: [SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time
Repository: spark Updated Branches: refs/heads/branch-2.0 ee6eea644 -> dc85bd0a0 [SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time ## What changes were proposed in this pull request? Internally, we use Int to represent a date (the days since 1970-01-01), when we convert that into unix timestamp (milli-seconds since epoch in UTC), we get the offset of a timezone using local millis (the milli-seconds since 1970-01-01 in a timezone), but TimeZone.getOffset() expect unix timestamp, the result could be off by one hour (in Daylight Saving Time (DST) or not). This PR change to use best effort approximate of posix timestamp to lookup the offset. In the event of changing of DST, Some time is not defined (for example, 2016-03-13 02:00:00 PST), or could lead to multiple valid result in UTC (for example, 2016-11-06 01:00:00), this best effort approximate should be enough in practice. ## How was this patch tested? Added regression tests. Author: Davies Liu Closes #13652 from davies/fix_timezone. (cherry picked from commit 001a58960311b07fe80e2f01e473f4987948d06e) Signed-off-by: Davies Liu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dc85bd0a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dc85bd0a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dc85bd0a Branch: refs/heads/branch-2.0 Commit: dc85bd0a08d7a511c667df2cbf27d6c2085c19dc Parents: ee6eea6 Author: Davies Liu Authored: Sun Jun 19 00:34:52 2016 -0700 Committer: Davies Liu Committed: Sun Jun 19 00:35:02 2016 -0700 -- .../spark/sql/catalyst/util/DateTimeUtils.scala | 51 ++-- .../org/apache/spark/sql/types/DateType.scala | 2 +- .../sql/catalyst/util/DateTimeTestUtils.scala | 40 +++ .../sql/catalyst/util/DateTimeUtilsSuite.scala | 40 +++ 4 files changed, 129 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dc85bd0a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index e08328a..56bf9a7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -100,8 +100,8 @@ object DateTimeUtils { // reverse of millisToDays def daysToMillis(days: SQLDate): Long = { -val millisUtc = days.toLong * MILLIS_PER_DAY -millisUtc - threadLocalLocalTimeZone.get().getOffset(millisUtc) +val millisLocal = days.toLong * MILLIS_PER_DAY +millisLocal - getOffsetFromLocalMillis(millisLocal, threadLocalLocalTimeZone.get()) } def dateToString(days: SQLDate): String = @@ -851,6 +851,41 @@ object DateTimeUtils { } /** + * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in given timezone. + */ + private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = { +var guess = tz.getRawOffset +// the actual offset should be calculated based on milliseconds in UTC +val offset = tz.getOffset(millisLocal - guess) +if (offset != guess) { + guess = tz.getOffset(millisLocal - offset) + if (guess != offset) { +// fallback to do the reverse lookup using java.sql.Timestamp +// this should only happen near the start or end of DST +val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt +val year = getYear(days) +val month = getMonth(days) +val day = getDayOfMonth(days) + +var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt +if (millisOfDay < 0) { + millisOfDay += MILLIS_PER_DAY.toInt +} +val seconds = (millisOfDay / 1000L).toInt +val hh = seconds / 3600 +val mm = seconds / 60 % 60 +val ss = seconds % 60 +val nano = millisOfDay % 1000 * 100 + +// create a Timestamp to get the unix timestamp (in UTC) +val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, nano) +guess = (millisLocal - timestamp.getTime).toInt + } +} +guess + } + + /** * Returns a timestamp of given timezone from utc timestamp, with the same string * representation in their timezone. */ @@ -866,7 +901,17 @@ object DateTimeUtils { */ def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = { val tz = TimeZone.getTimeZone(timeZone) -val offset = tz.getOffset(time / 1000L) +val offset = getOffsetFromLocalMillis(time / 1000L, tz)
spark git commit: [SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time
Repository: spark Updated Branches: refs/heads/master ce3b98bae -> 001a58960 [SPARK-15613] [SQL] Fix incorrect days to millis conversion due to Daylight Saving Time ## What changes were proposed in this pull request? Internally, we use Int to represent a date (the days since 1970-01-01), when we convert that into unix timestamp (milli-seconds since epoch in UTC), we get the offset of a timezone using local millis (the milli-seconds since 1970-01-01 in a timezone), but TimeZone.getOffset() expect unix timestamp, the result could be off by one hour (in Daylight Saving Time (DST) or not). This PR change to use best effort approximate of posix timestamp to lookup the offset. In the event of changing of DST, Some time is not defined (for example, 2016-03-13 02:00:00 PST), or could lead to multiple valid result in UTC (for example, 2016-11-06 01:00:00), this best effort approximate should be enough in practice. ## How was this patch tested? Added regression tests. Author: Davies Liu Closes #13652 from davies/fix_timezone. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/001a5896 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/001a5896 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/001a5896 Branch: refs/heads/master Commit: 001a58960311b07fe80e2f01e473f4987948d06e Parents: ce3b98b Author: Davies Liu Authored: Sun Jun 19 00:34:52 2016 -0700 Committer: Davies Liu Committed: Sun Jun 19 00:34:52 2016 -0700 -- .../spark/sql/catalyst/util/DateTimeUtils.scala | 51 ++-- .../org/apache/spark/sql/types/DateType.scala | 2 +- .../sql/catalyst/util/DateTimeTestUtils.scala | 40 +++ .../sql/catalyst/util/DateTimeUtilsSuite.scala | 40 +++ 4 files changed, 129 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/001a5896/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala index e08328a..56bf9a7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala @@ -100,8 +100,8 @@ object DateTimeUtils { // reverse of millisToDays def daysToMillis(days: SQLDate): Long = { -val millisUtc = days.toLong * MILLIS_PER_DAY -millisUtc - threadLocalLocalTimeZone.get().getOffset(millisUtc) +val millisLocal = days.toLong * MILLIS_PER_DAY +millisLocal - getOffsetFromLocalMillis(millisLocal, threadLocalLocalTimeZone.get()) } def dateToString(days: SQLDate): String = @@ -851,6 +851,41 @@ object DateTimeUtils { } /** + * Lookup the offset for given millis seconds since 1970-01-01 00:00:00 in given timezone. + */ + private def getOffsetFromLocalMillis(millisLocal: Long, tz: TimeZone): Long = { +var guess = tz.getRawOffset +// the actual offset should be calculated based on milliseconds in UTC +val offset = tz.getOffset(millisLocal - guess) +if (offset != guess) { + guess = tz.getOffset(millisLocal - offset) + if (guess != offset) { +// fallback to do the reverse lookup using java.sql.Timestamp +// this should only happen near the start or end of DST +val days = Math.floor(millisLocal.toDouble / MILLIS_PER_DAY).toInt +val year = getYear(days) +val month = getMonth(days) +val day = getDayOfMonth(days) + +var millisOfDay = (millisLocal % MILLIS_PER_DAY).toInt +if (millisOfDay < 0) { + millisOfDay += MILLIS_PER_DAY.toInt +} +val seconds = (millisOfDay / 1000L).toInt +val hh = seconds / 3600 +val mm = seconds / 60 % 60 +val ss = seconds % 60 +val nano = millisOfDay % 1000 * 100 + +// create a Timestamp to get the unix timestamp (in UTC) +val timestamp = new Timestamp(year - 1900, month - 1, day, hh, mm, ss, nano) +guess = (millisLocal - timestamp.getTime).toInt + } +} +guess + } + + /** * Returns a timestamp of given timezone from utc timestamp, with the same string * representation in their timezone. */ @@ -866,7 +901,17 @@ object DateTimeUtils { */ def toUTCTime(time: SQLTimestamp, timeZone: String): SQLTimestamp = { val tz = TimeZone.getTimeZone(timeZone) -val offset = tz.getOffset(time / 1000L) +val offset = getOffsetFromLocalMillis(time / 1000L, tz) time - offset * 1000L } + + /** + * Re-initialize the current thread's thread locals. Exposed fo