spark git commit: [SPARK-20718][SQL] FileSourceScanExec with different filter orders should be the same after canonicalization

2017-05-11 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 2cac317a8 -> a8d981dc5


[SPARK-20718][SQL] FileSourceScanExec with different filter orders should be 
the same after canonicalization

## What changes were proposed in this pull request?

Since `constraints` in `QueryPlan` is a set, the order of filters can differ. 
Usually this is ok because of canonicalization. However, in 
`FileSourceScanExec`, its data filters and partition filters are sequences, and 
their orders are not canonicalized. So `def sameResult` returns different 
results for different orders of data/partition filters. This leads to, e.g. 
different decision for `ReuseExchange`, and thus results in unstable 
performance.

## How was this patch tested?

Added a new test for `FileSourceScanExec.sameResult`.

Author: wangzhenhua 

Closes #17959 from wzhfy/canonicalizeFileSourceScanExec.

(cherry picked from commit c8da5356000c8e4ff9141e4a2892ebe0b9641d63)
Signed-off-by: Wenchen Fan 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8d981dc
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8d981dc
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8d981dc

Branch: refs/heads/branch-2.2
Commit: a8d981dc5d11d65a4bd3a68aa57455b34a2649f9
Parents: 2cac317
Author: wangzhenhua 
Authored: Fri May 12 13:42:48 2017 +0800
Committer: Wenchen Fan 
Committed: Fri May 12 13:43:04 2017 +0800

--
 .../sql/execution/DataSourceScanExec.scala  | 16 +--
 .../spark/sql/execution/SameResultSuite.scala   | 49 
 2 files changed, 62 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/a8d981dc/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 866fa98..251098c 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
-trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
+trait DataSourceScanExec extends LeafExecNode with CodegenSupport with 
PredicateHelper {
   val relation: BaseRelation
   val metastoreTableIdentifier: Option[TableIdentifier]
 
@@ -519,8 +519,18 @@ case class FileSourceScanExec(
   relation,
   output.map(QueryPlan.normalizeExprId(_, output)),
   requiredSchema,
-  partitionFilters.map(QueryPlan.normalizeExprId(_, output)),
-  dataFilters.map(QueryPlan.normalizeExprId(_, output)),
+  canonicalizeFilters(partitionFilters, output),
+  canonicalizeFilters(dataFilters, output),
   None)
   }
+
+  private def canonicalizeFilters(filters: Seq[Expression], output: 
Seq[Attribute])
+: Seq[Expression] = {
+if (filters.nonEmpty) {
+  val normalizedFilters = QueryPlan.normalizeExprId(filters.reduce(And), 
output)
+  splitConjunctivePredicates(normalizedFilters)
+} else {
+  Nil
+}
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/a8d981dc/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
new file mode 100644
index 000..25e4ca0
--- /dev/null
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+

spark git commit: [SPARK-20718][SQL] FileSourceScanExec with different filter orders should be the same after canonicalization

2017-05-11 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/master 2b36eb696 -> c8da53560


[SPARK-20718][SQL] FileSourceScanExec with different filter orders should be 
the same after canonicalization

## What changes were proposed in this pull request?

Since `constraints` in `QueryPlan` is a set, the order of filters can differ. 
Usually this is ok because of canonicalization. However, in 
`FileSourceScanExec`, its data filters and partition filters are sequences, and 
their orders are not canonicalized. So `def sameResult` returns different 
results for different orders of data/partition filters. This leads to, e.g. 
different decision for `ReuseExchange`, and thus results in unstable 
performance.

## How was this patch tested?

Added a new test for `FileSourceScanExec.sameResult`.

Author: wangzhenhua 

Closes #17959 from wzhfy/canonicalizeFileSourceScanExec.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8da5356
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8da5356
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8da5356

Branch: refs/heads/master
Commit: c8da5356000c8e4ff9141e4a2892ebe0b9641d63
Parents: 2b36eb6
Author: wangzhenhua 
Authored: Fri May 12 13:42:48 2017 +0800
Committer: Wenchen Fan 
Committed: Fri May 12 13:42:48 2017 +0800

--
 .../sql/execution/DataSourceScanExec.scala  | 16 +--
 .../spark/sql/execution/SameResultSuite.scala   | 49 
 2 files changed, 62 insertions(+), 3 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/c8da5356/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
--
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 866fa98..251098c 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.Utils
 
-trait DataSourceScanExec extends LeafExecNode with CodegenSupport {
+trait DataSourceScanExec extends LeafExecNode with CodegenSupport with 
PredicateHelper {
   val relation: BaseRelation
   val metastoreTableIdentifier: Option[TableIdentifier]
 
@@ -519,8 +519,18 @@ case class FileSourceScanExec(
   relation,
   output.map(QueryPlan.normalizeExprId(_, output)),
   requiredSchema,
-  partitionFilters.map(QueryPlan.normalizeExprId(_, output)),
-  dataFilters.map(QueryPlan.normalizeExprId(_, output)),
+  canonicalizeFilters(partitionFilters, output),
+  canonicalizeFilters(dataFilters, output),
   None)
   }
+
+  private def canonicalizeFilters(filters: Seq[Expression], output: 
Seq[Attribute])
+: Seq[Expression] = {
+if (filters.nonEmpty) {
+  val normalizedFilters = QueryPlan.normalizeExprId(filters.reduce(And), 
output)
+  splitConjunctivePredicates(normalizedFilters)
+} else {
+  Nil
+}
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/c8da5356/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
new file mode 100644
index 000..25e4ca0
--- /dev/null
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.{DataFrame, QueryTest}
+import org.apache.spark.sql.test.SharedSQLContext
+
+/**
+ * Tests for the 

spark git commit: [SPARK-20665][SQL] Bround" and "Round" function return NULL

2017-05-11 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/branch-2.0 d86dae8fe -> b2d0ed287


[SPARK-20665][SQL] Bround" and "Round" function return NULL

   spark-sql>select bround(12.3, 2);
   spark-sql>NULL
For this case,  the expected result is 12.3, but it is null.
So ,when the second parameter is bigger than "decimal.scala", the result is not 
we expected.
"round" function  has the same problem. This PR can solve the problem for both 
of them.

unit test cases in MathExpressionsSuite and MathFunctionsSuite

Author: liuxian 

Closes #17906 from 10110346/wip_lx_0509.

(cherry picked from commit 2b36eb696f6c738e1328582630755aaac4293460)
Signed-off-by: Wenchen Fan 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b2d0ed28
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b2d0ed28
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b2d0ed28

Branch: refs/heads/branch-2.0
Commit: b2d0ed2875fcc90a3ac70e857eb42bce9055e6d6
Parents: d86dae8
Author: liuxian 
Authored: Fri May 12 11:38:50 2017 +0800
Committer: Wenchen Fan 
Committed: Fri May 12 11:43:21 2017 +0800

--
 .../sql/catalyst/expressions/mathExpressions.scala | 12 ++--
 .../sql/catalyst/expressions/MathFunctionsSuite.scala  |  7 +++
 .../org/apache/spark/sql/MathExpressionsSuite.scala| 13 +
 3 files changed, 22 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b2d0ed28/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 591e1e5..c7dfeb7 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -845,10 +845,10 @@ abstract class RoundBase(child: Expression, scale: 
Expression,
 
   // not overriding since _scale is a constant int at runtime
   def nullSafeEval(input1: Any): Any = {
-child.dataType match {
-  case _: DecimalType =>
+dataType match {
+  case DecimalType.Fixed(_, s) =>
 val decimal = input1.asInstanceOf[Decimal]
-if (decimal.changePrecision(decimal.precision, _scale, mode)) decimal 
else null
+if (decimal.changePrecision(decimal.precision, s, mode)) decimal else 
null
   case ByteType =>
 BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByte
   case ShortType =>
@@ -877,10 +877,10 @@ abstract class RoundBase(child: Expression, scale: 
Expression,
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
 val ce = child.genCode(ctx)
 
-val evaluationCode = child.dataType match {
-  case _: DecimalType =>
+val evaluationCode = dataType match {
+  case DecimalType.Fixed(_, s) =>
 s"""
-if (${ce.value}.changePrecision(${ce.value}.precision(), ${_scale},
+if (${ce.value}.changePrecision(${ce.value}.precision(), ${s},
 java.math.BigDecimal.${modeStr})) {
   ${ev.value} = ${ce.value};
 } else {

http://git-wip-us.apache.org/repos/asf/spark/blob/b2d0ed28/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
index f88c9e8..a08db2f 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
@@ -546,15 +546,14 @@ class MathFunctionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3.0), BigDecimal(3.1), 
BigDecimal(3.14),
   BigDecimal(3.142), BigDecimal(3.1416), BigDecimal(3.14159),
   BigDecimal(3.141593), BigDecimal(3.1415927))
-// round_scale > current_scale would result in precision increase
-// and not allowed by o.a.s.s.types.Decimal.changePrecision, therefore null
+
 (0 to 7).foreach { i =>
   checkEvaluation(Round(bdPi, i), bdResults(i), EmptyRow)
   checkEvaluation(BRound(bdPi, i), bdResults(i), EmptyRow)
 }
 (8 to 10).foreach { scale =>
-  checkEvaluation(Round(bdPi, scale), null, EmptyRow)
-  

spark git commit: [SPARK-20665][SQL] Bround" and "Round" function return NULL

2017-05-11 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/branch-2.1 92a71a667 -> 6e89d5740


[SPARK-20665][SQL] Bround" and "Round" function return NULL

   spark-sql>select bround(12.3, 2);
   spark-sql>NULL
For this case,  the expected result is 12.3, but it is null.
So ,when the second parameter is bigger than "decimal.scala", the result is not 
we expected.
"round" function  has the same problem. This PR can solve the problem for both 
of them.

unit test cases in MathExpressionsSuite and MathFunctionsSuite

Author: liuxian 

Closes #17906 from 10110346/wip_lx_0509.

(cherry picked from commit 2b36eb696f6c738e1328582630755aaac4293460)
Signed-off-by: Wenchen Fan 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6e89d574
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6e89d574
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6e89d574

Branch: refs/heads/branch-2.1
Commit: 6e89d574058bc2b96b14a691a07580be67f63707
Parents: 92a71a6
Author: liuxian 
Authored: Fri May 12 11:38:50 2017 +0800
Committer: Wenchen Fan 
Committed: Fri May 12 11:41:40 2017 +0800

--
 .../sql/catalyst/expressions/mathExpressions.scala | 12 ++--
 .../catalyst/expressions/MathExpressionsSuite.scala|  7 +++
 .../org/apache/spark/sql/MathFunctionsSuite.scala  | 13 +
 3 files changed, 22 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/6e89d574/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 65273a7..54b8457 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -1021,10 +1021,10 @@ abstract class RoundBase(child: Expression, scale: 
Expression,
 
   // not overriding since _scale is a constant int at runtime
   def nullSafeEval(input1: Any): Any = {
-child.dataType match {
-  case _: DecimalType =>
+dataType match {
+  case DecimalType.Fixed(_, s) =>
 val decimal = input1.asInstanceOf[Decimal]
-if (decimal.changePrecision(decimal.precision, _scale, mode)) decimal 
else null
+if (decimal.changePrecision(decimal.precision, s, mode)) decimal else 
null
   case ByteType =>
 BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByte
   case ShortType =>
@@ -1053,10 +1053,10 @@ abstract class RoundBase(child: Expression, scale: 
Expression,
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
 val ce = child.genCode(ctx)
 
-val evaluationCode = child.dataType match {
-  case _: DecimalType =>
+val evaluationCode = dataType match {
+  case DecimalType.Fixed(_, s) =>
 s"""
-if (${ce.value}.changePrecision(${ce.value}.precision(), ${_scale},
+if (${ce.value}.changePrecision(${ce.value}.precision(), ${s},
 java.math.BigDecimal.${modeStr})) {
   ${ev.value} = ${ce.value};
 } else {

http://git-wip-us.apache.org/repos/asf/spark/blob/6e89d574/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index 6b5bfac..1555dd1 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -546,15 +546,14 @@ class MathExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3.0), BigDecimal(3.1), 
BigDecimal(3.14),
   BigDecimal(3.142), BigDecimal(3.1416), BigDecimal(3.14159),
   BigDecimal(3.141593), BigDecimal(3.1415927))
-// round_scale > current_scale would result in precision increase
-// and not allowed by o.a.s.s.types.Decimal.changePrecision, therefore null
+
 (0 to 7).foreach { i =>
   checkEvaluation(Round(bdPi, i), bdResults(i), EmptyRow)
   checkEvaluation(BRound(bdPi, i), bdResults(i), EmptyRow)
 }
 (8 to 10).foreach { scale =>
-  checkEvaluation(Round(bdPi, scale), null, 

spark git commit: [SPARK-20665][SQL] Bround" and "Round" function return NULL

2017-05-11 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 3d1908fd5 -> 2cac317a8


[SPARK-20665][SQL] Bround" and "Round" function return NULL

## What changes were proposed in this pull request?
   spark-sql>select bround(12.3, 2);
   spark-sql>NULL
For this case,  the expected result is 12.3, but it is null.
So ,when the second parameter is bigger than "decimal.scala", the result is not 
we expected.
"round" function  has the same problem. This PR can solve the problem for both 
of them.

## How was this patch tested?
unit test cases in MathExpressionsSuite and MathFunctionsSuite

Author: liuxian 

Closes #17906 from 10110346/wip_lx_0509.

(cherry picked from commit 2b36eb696f6c738e1328582630755aaac4293460)
Signed-off-by: Wenchen Fan 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2cac317a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2cac317a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2cac317a

Branch: refs/heads/branch-2.2
Commit: 2cac317a84a234f034b0c75dcb5e4c27860a4cc0
Parents: 3d1908f
Author: liuxian 
Authored: Fri May 12 11:38:50 2017 +0800
Committer: Wenchen Fan 
Committed: Fri May 12 11:39:02 2017 +0800

--
 .../sql/catalyst/expressions/mathExpressions.scala | 12 ++--
 .../catalyst/expressions/MathExpressionsSuite.scala|  7 +++
 .../org/apache/spark/sql/MathFunctionsSuite.scala  | 13 +
 3 files changed, 22 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2cac317a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index c4d47ab..de1a46d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -1023,10 +1023,10 @@ abstract class RoundBase(child: Expression, scale: 
Expression,
 
   // not overriding since _scale is a constant int at runtime
   def nullSafeEval(input1: Any): Any = {
-child.dataType match {
-  case _: DecimalType =>
+dataType match {
+  case DecimalType.Fixed(_, s) =>
 val decimal = input1.asInstanceOf[Decimal]
-decimal.toPrecision(decimal.precision, _scale, mode).orNull
+decimal.toPrecision(decimal.precision, s, mode).orNull
   case ByteType =>
 BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByte
   case ShortType =>
@@ -1055,10 +1055,10 @@ abstract class RoundBase(child: Expression, scale: 
Expression,
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
 val ce = child.genCode(ctx)
 
-val evaluationCode = child.dataType match {
-  case _: DecimalType =>
+val evaluationCode = dataType match {
+  case DecimalType.Fixed(_, s) =>
 s"""
-if (${ce.value}.changePrecision(${ce.value}.precision(), ${_scale},
+if (${ce.value}.changePrecision(${ce.value}.precision(), ${s},
 java.math.BigDecimal.${modeStr})) {
   ${ev.value} = ${ce.value};
 } else {

http://git-wip-us.apache.org/repos/asf/spark/blob/2cac317a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index 6b5bfac..1555dd1 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -546,15 +546,14 @@ class MathExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3.0), BigDecimal(3.1), 
BigDecimal(3.14),
   BigDecimal(3.142), BigDecimal(3.1416), BigDecimal(3.14159),
   BigDecimal(3.141593), BigDecimal(3.1415927))
-// round_scale > current_scale would result in precision increase
-// and not allowed by o.a.s.s.types.Decimal.changePrecision, therefore null
+
 (0 to 7).foreach { i =>
   checkEvaluation(Round(bdPi, i), bdResults(i), EmptyRow)
   checkEvaluation(BRound(bdPi, i), bdResults(i), EmptyRow)
 }
 (8 to 10).foreach { scale =>
-  

spark git commit: [SPARK-20665][SQL] Bround" and "Round" function return NULL

2017-05-11 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/master 609ba5f2b -> 2b36eb696


[SPARK-20665][SQL] Bround" and "Round" function return NULL

## What changes were proposed in this pull request?
   spark-sql>select bround(12.3, 2);
   spark-sql>NULL
For this case,  the expected result is 12.3, but it is null.
So ,when the second parameter is bigger than "decimal.scala", the result is not 
we expected.
"round" function  has the same problem. This PR can solve the problem for both 
of them.

## How was this patch tested?
unit test cases in MathExpressionsSuite and MathFunctionsSuite

Author: liuxian 

Closes #17906 from 10110346/wip_lx_0509.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2b36eb69
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2b36eb69
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2b36eb69

Branch: refs/heads/master
Commit: 2b36eb696f6c738e1328582630755aaac4293460
Parents: 609ba5f
Author: liuxian 
Authored: Fri May 12 11:38:50 2017 +0800
Committer: Wenchen Fan 
Committed: Fri May 12 11:38:50 2017 +0800

--
 .../sql/catalyst/expressions/mathExpressions.scala | 12 ++--
 .../catalyst/expressions/MathExpressionsSuite.scala|  7 +++
 .../org/apache/spark/sql/MathFunctionsSuite.scala  | 13 +
 3 files changed, 22 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/2b36eb69/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index c4d47ab..de1a46d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -1023,10 +1023,10 @@ abstract class RoundBase(child: Expression, scale: 
Expression,
 
   // not overriding since _scale is a constant int at runtime
   def nullSafeEval(input1: Any): Any = {
-child.dataType match {
-  case _: DecimalType =>
+dataType match {
+  case DecimalType.Fixed(_, s) =>
 val decimal = input1.asInstanceOf[Decimal]
-decimal.toPrecision(decimal.precision, _scale, mode).orNull
+decimal.toPrecision(decimal.precision, s, mode).orNull
   case ByteType =>
 BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByte
   case ShortType =>
@@ -1055,10 +1055,10 @@ abstract class RoundBase(child: Expression, scale: 
Expression,
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
 val ce = child.genCode(ctx)
 
-val evaluationCode = child.dataType match {
-  case _: DecimalType =>
+val evaluationCode = dataType match {
+  case DecimalType.Fixed(_, s) =>
 s"""
-if (${ce.value}.changePrecision(${ce.value}.precision(), ${_scale},
+if (${ce.value}.changePrecision(${ce.value}.precision(), ${s},
 java.math.BigDecimal.${modeStr})) {
   ${ev.value} = ${ce.value};
 } else {

http://git-wip-us.apache.org/repos/asf/spark/blob/2b36eb69/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
--
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
index 6b5bfac..1555dd1 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala
@@ -546,15 +546,14 @@ class MathExpressionsSuite extends SparkFunSuite with 
ExpressionEvalHelper {
 val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3.0), BigDecimal(3.1), 
BigDecimal(3.14),
   BigDecimal(3.142), BigDecimal(3.1416), BigDecimal(3.14159),
   BigDecimal(3.141593), BigDecimal(3.1415927))
-// round_scale > current_scale would result in precision increase
-// and not allowed by o.a.s.s.types.Decimal.changePrecision, therefore null
+
 (0 to 7).foreach { i =>
   checkEvaluation(Round(bdPi, i), bdResults(i), EmptyRow)
   checkEvaluation(BRound(bdPi, i), bdResults(i), EmptyRow)
 }
 (8 to 10).foreach { scale =>
-  checkEvaluation(Round(bdPi, scale), null, EmptyRow)
-  checkEvaluation(BRound(bdPi, scale), null, EmptyRow)
+  

spark git commit: [SPARK-20399][SQL] Add a config to fallback string literal parsing consistent with old sql parser behavior

2017-05-11 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/master 04901dd03 -> 609ba5f2b


[SPARK-20399][SQL] Add a config to fallback string literal parsing consistent 
with old sql parser behavior

## What changes were proposed in this pull request?

The new SQL parser is introduced into Spark 2.0. All string literals are 
unescaped in parser. Seems it bring an issue regarding the regex pattern string.

The following codes can reproduce it:

val data = Seq("\u0020\u0021\u0023", "abc")
val df = data.toDF()

// 1st usage: works in 1.6
// Let parser parse pattern string
val rlike1 = df.filter("value rlike '^\\x20[\\x20-\\x23]+$'")
// 2nd usage: works in 1.6, 2.x
// Call Column.rlike so the pattern string is a literal which doesn't go 
through parser
val rlike2 = df.filter($"value".rlike("^\\x20[\\x20-\\x23]+$"))

// In 2.x, we need add backslashes to make regex pattern parsed correctly
val rlike3 = df.filter("value rlike '^x20[x20-x23]+$'")

Follow the discussion in #17736, this patch adds a config to fallback to 1.6 
string literal parsing and mitigate migration issue.

## How was this patch tested?

Jenkins tests.

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: Liang-Chi Hsieh 

Closes #17887 from viirya/add-config-fallback-string-parsing.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/609ba5f2
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/609ba5f2
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/609ba5f2

Branch: refs/heads/master
Commit: 609ba5f2b9fd89b1b9971d08f7cc680d202dbc7c
Parents: 04901dd
Author: Liang-Chi Hsieh 
Authored: Fri May 12 11:15:10 2017 +0800
Committer: Wenchen Fan 
Committed: Fri May 12 11:15:10 2017 +0800

--
 .../sql/catalyst/catalog/SessionCatalog.scala   |   2 +-
 .../expressions/regexpExpressions.scala |  33 -
 .../spark/sql/catalyst/parser/AstBuilder.scala  |  11 +-
 .../spark/sql/catalyst/parser/ParseDriver.scala |   8 +-
 .../spark/sql/catalyst/parser/ParserUtils.scala |   6 +
 .../org/apache/spark/sql/internal/SQLConf.scala |  10 ++
 .../catalyst/parser/ExpressionParserSuite.scala | 128 +--
 .../spark/sql/execution/SparkSqlParser.scala|   2 +-
 .../org/apache/spark/sql/DatasetSuite.scala |  13 ++
 9 files changed, 171 insertions(+), 42 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/609ba5f2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 18e5146..f6653d3 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -73,7 +73,7 @@ class SessionCatalog(
   functionRegistry,
   conf,
   new Configuration(),
-  CatalystSqlParser,
+  new CatalystSqlParser(conf),
   DummyFunctionResourceLoader)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/609ba5f2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 3fa8458..aa5a1b5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -86,6 +86,13 @@ abstract class StringRegexExpression extends BinaryExpression
 escape character, the following character is matched literally. It is 
invalid to escape
 any other character.
 
+Since Spark 2.0, string literals are unescaped in our SQL parser. For 
example, in order
+to match "\abc", the pattern should be "\\abc".
+
+When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, 
it fallbacks
+to Spark 1.6 behavior regarding string literal parsing. For example, 
if the config is
+enabled, the pattern to match "\abc" should be "\abc".
+
 Examples:
   > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%'
   true
@@ -144,7 +151,31 @@ case class Like(left: Expression, right: Expression) 
extends StringRegexExpressi
 }
 
 @ExpressionDescription(
-  

spark git commit: [SPARK-20399][SQL] Add a config to fallback string literal parsing consistent with old sql parser behavior

2017-05-11 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 5844151bc -> 3d1908fd5


[SPARK-20399][SQL] Add a config to fallback string literal parsing consistent 
with old sql parser behavior

## What changes were proposed in this pull request?

The new SQL parser is introduced into Spark 2.0. All string literals are 
unescaped in parser. Seems it bring an issue regarding the regex pattern string.

The following codes can reproduce it:

val data = Seq("\u0020\u0021\u0023", "abc")
val df = data.toDF()

// 1st usage: works in 1.6
// Let parser parse pattern string
val rlike1 = df.filter("value rlike '^\\x20[\\x20-\\x23]+$'")
// 2nd usage: works in 1.6, 2.x
// Call Column.rlike so the pattern string is a literal which doesn't go 
through parser
val rlike2 = df.filter($"value".rlike("^\\x20[\\x20-\\x23]+$"))

// In 2.x, we need add backslashes to make regex pattern parsed correctly
val rlike3 = df.filter("value rlike '^x20[x20-x23]+$'")

Follow the discussion in #17736, this patch adds a config to fallback to 1.6 
string literal parsing and mitigate migration issue.

## How was this patch tested?

Jenkins tests.

Please review http://spark.apache.org/contributing.html before opening a pull 
request.

Author: Liang-Chi Hsieh 

Closes #17887 from viirya/add-config-fallback-string-parsing.

(cherry picked from commit 609ba5f2b9fd89b1b9971d08f7cc680d202dbc7c)
Signed-off-by: Wenchen Fan 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d1908fd
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d1908fd
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d1908fd

Branch: refs/heads/branch-2.2
Commit: 3d1908fd58fd9b1970cbffebdb731bfe4c776ad9
Parents: 5844151
Author: Liang-Chi Hsieh 
Authored: Fri May 12 11:15:10 2017 +0800
Committer: Wenchen Fan 
Committed: Fri May 12 11:15:26 2017 +0800

--
 .../sql/catalyst/catalog/SessionCatalog.scala   |   2 +-
 .../expressions/regexpExpressions.scala |  33 -
 .../spark/sql/catalyst/parser/AstBuilder.scala  |  11 +-
 .../spark/sql/catalyst/parser/ParseDriver.scala |   8 +-
 .../spark/sql/catalyst/parser/ParserUtils.scala |   6 +
 .../org/apache/spark/sql/internal/SQLConf.scala |  10 ++
 .../catalyst/parser/ExpressionParserSuite.scala | 128 +--
 .../spark/sql/execution/SparkSqlParser.scala|   2 +-
 .../org/apache/spark/sql/DatasetSuite.scala |  13 ++
 9 files changed, 171 insertions(+), 42 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3d1908fd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 18e5146..f6653d3 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -73,7 +73,7 @@ class SessionCatalog(
   functionRegistry,
   conf,
   new Configuration(),
-  CatalystSqlParser,
+  new CatalystSqlParser(conf),
   DummyFunctionResourceLoader)
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/3d1908fd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 3fa8458..aa5a1b5 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -86,6 +86,13 @@ abstract class StringRegexExpression extends BinaryExpression
 escape character, the following character is matched literally. It is 
invalid to escape
 any other character.
 
+Since Spark 2.0, string literals are unescaped in our SQL parser. For 
example, in order
+to match "\abc", the pattern should be "\\abc".
+
+When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, 
it fallbacks
+to Spark 1.6 behavior regarding string literal parsing. For example, 
if the config is
+enabled, the pattern to match "\abc" should be "\abc".
+
 Examples:
   > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%'
   true
@@ 

spark git commit: [SPARK-20431][SQL] Specify a schema by using a DDL-formatted string

2017-05-11 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master 7144b5180 -> 04901dd03


[SPARK-20431][SQL] Specify a schema by using a DDL-formatted string

## What changes were proposed in this pull request?
This pr supported a DDL-formatted string in `DataFrameReader.schema`.
This fix could make users easily define a schema without importing  
`o.a.spark.sql.types._`.

## How was this patch tested?
Added tests in `DataFrameReaderWriterSuite`.

Author: Takeshi Yamamuro 

Closes #17719 from maropu/SPARK-20431.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/04901dd0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/04901dd0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/04901dd0

Branch: refs/heads/master
Commit: 04901dd03a3f8062fd39ea38d585935ff71a9248
Parents: 7144b51
Author: Takeshi Yamamuro 
Authored: Thu May 11 11:06:29 2017 -0700
Committer: Xiao Li 
Committed: Thu May 11 11:06:29 2017 -0700

--
 python/pyspark/sql/readwriter.py| 23 +---
 .../org/apache/spark/sql/DataFrameReader.scala  | 12 ++
 .../sql/test/DataFrameReaderWriterSuite.scala   |  9 
 3 files changed, 36 insertions(+), 8 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/04901dd0/python/pyspark/sql/readwriter.py
--
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 61a6b76..5cf719b 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -96,14 +96,18 @@ class DataFrameReader(OptionUtils):
 By specifying the schema here, the underlying data source can skip the 
schema
 inference step, and thus speed up data loading.
 
-:param schema: a :class:`pyspark.sql.types.StructType` object
+:param schema: a :class:`pyspark.sql.types.StructType` object or a 
DDL-formatted string
+   (For example ``col0 INT, col1 DOUBLE``).
 """
 from pyspark.sql import SparkSession
-if not isinstance(schema, StructType):
-raise TypeError("schema should be StructType")
 spark = SparkSession.builder.getOrCreate()
-jschema = spark._jsparkSession.parseDataType(schema.json())
-self._jreader = self._jreader.schema(jschema)
+if isinstance(schema, StructType):
+jschema = spark._jsparkSession.parseDataType(schema.json())
+self._jreader = self._jreader.schema(jschema)
+elif isinstance(schema, basestring):
+self._jreader = self._jreader.schema(schema)
+else:
+raise TypeError("schema should be StructType or string")
 return self
 
 @since(1.5)
@@ -137,7 +141,8 @@ class DataFrameReader(OptionUtils):
 
 :param path: optional string or a list of string for file-system 
backed data sources.
 :param format: optional string for format of the data source. Default 
to 'parquet'.
-:param schema: optional :class:`pyspark.sql.types.StructType` for the 
input schema.
+:param schema: optional :class:`pyspark.sql.types.StructType` for the 
input schema
+   or a DDL-formatted string (For example ``col0 INT, col1 
DOUBLE``).
 :param options: all other string options
 
 >>> df = 
spark.read.load('python/test_support/sql/parquet_partitioned', opt1=True,
@@ -181,7 +186,8 @@ class DataFrameReader(OptionUtils):
 
 :param path: string represents path to the JSON dataset, or a list of 
paths,
  or RDD of Strings storing JSON objects.
-:param schema: an optional :class:`pyspark.sql.types.StructType` for 
the input schema.
+:param schema: an optional :class:`pyspark.sql.types.StructType` for 
the input schema or
+   a DDL-formatted string (For example ``col0 INT, col1 
DOUBLE``).
 :param primitivesAsString: infers all primitive values as a string 
type. If None is set,
it uses the default value, ``false``.
 :param prefersDecimal: infers all floating-point values as a decimal 
type. If the values
@@ -324,7 +330,8 @@ class DataFrameReader(OptionUtils):
 ``inferSchema`` option or specify the schema explicitly using 
``schema``.
 
 :param path: string, or list of strings, for input path(s).
-:param schema: an optional :class:`pyspark.sql.types.StructType` for 
the input schema.
+:param schema: an optional :class:`pyspark.sql.types.StructType` for 
the input schema
+   or a DDL-formatted string (For example ``col0 INT, col1 
DOUBLE``).
 :param sep: sets the single character as a separator for each 

spark git commit: [SPARK-20600][SS] KafkaRelation should be pretty printed in web UI

2017-05-11 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/master 3aa4e464a -> 7144b5180


[SPARK-20600][SS] KafkaRelation should be pretty printed in web UI

## What changes were proposed in this pull request?

User-friendly name of `KafkaRelation` in web UI (under Details for Query).

### Before

https://cloud.githubusercontent.com/assets/62313/25841955/74479ac6-34a2-11e7-87fb-d9f62a1356a7.png;>

### After

https://cloud.githubusercontent.com/assets/62313/25841829/f5335630-34a1-11e7-85a4-afe9b66d73c8.png;>

## How was this patch tested?

Local build

```
./bin/spark-shell --jars 
~/.m2/repository/org/apache/spark/spark-sql-kafka-0-10_2.11/2.3.0-SNAPSHOT/spark-sql-kafka-0-10_2.11-2.3.0-SNAPSHOT.jar
 --packages org.apache.kafka:kafka-clients:0.10.0.1
```

Author: Jacek Laskowski 

Closes #17917 from jaceklaskowski/SPARK-20600-KafkaRelation-webUI.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7144b518
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7144b518
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7144b518

Branch: refs/heads/master
Commit: 7144b51809aa99ac076786c369389e2330142beb
Parents: 3aa4e46
Author: Jacek Laskowski 
Authored: Thu May 11 10:55:11 2017 -0700
Committer: Shixiong Zhu 
Committed: Thu May 11 10:55:11 2017 -0700

--
 .../main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala  | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/7144b518/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
--
diff --git 
a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
 
b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index 97bd283..7103709 100644
--- 
a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ 
b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -143,4 +143,7 @@ private[kafka010] class KafkaRelation(
 validateTopicPartitions(partitions, partitionOffsets)
 }
   }
+
+  override def toString: String =
+s"KafkaRelation(strategy=$strategy, start=$startingOffsets, 
end=$endingOffsets)"
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-20600][SS] KafkaRelation should be pretty printed in web UI

2017-05-11 Thread zsxwing
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 dd9e3b2c9 -> 5844151bc


[SPARK-20600][SS] KafkaRelation should be pretty printed in web UI

## What changes were proposed in this pull request?

User-friendly name of `KafkaRelation` in web UI (under Details for Query).

### Before

https://cloud.githubusercontent.com/assets/62313/25841955/74479ac6-34a2-11e7-87fb-d9f62a1356a7.png;>

### After

https://cloud.githubusercontent.com/assets/62313/25841829/f5335630-34a1-11e7-85a4-afe9b66d73c8.png;>

## How was this patch tested?

Local build

```
./bin/spark-shell --jars 
~/.m2/repository/org/apache/spark/spark-sql-kafka-0-10_2.11/2.3.0-SNAPSHOT/spark-sql-kafka-0-10_2.11-2.3.0-SNAPSHOT.jar
 --packages org.apache.kafka:kafka-clients:0.10.0.1
```

Author: Jacek Laskowski 

Closes #17917 from jaceklaskowski/SPARK-20600-KafkaRelation-webUI.

(cherry picked from commit 7144b51809aa99ac076786c369389e2330142beb)
Signed-off-by: Shixiong Zhu 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5844151b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5844151b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5844151b

Branch: refs/heads/branch-2.2
Commit: 5844151bc8e410e7d5b48990bfc9d3c55926f56f
Parents: dd9e3b2
Author: Jacek Laskowski 
Authored: Thu May 11 10:55:11 2017 -0700
Committer: Shixiong Zhu 
Committed: Thu May 11 10:55:31 2017 -0700

--
 .../main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala  | 3 +++
 1 file changed, 3 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/5844151b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
--
diff --git 
a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
 
b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
index 97bd283..7103709 100644
--- 
a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
+++ 
b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala
@@ -143,4 +143,7 @@ private[kafka010] class KafkaRelation(
 validateTopicPartitions(partitions, partitionOffsets)
 }
   }
+
+  override def toString: String =
+s"KafkaRelation(strategy=$strategy, start=$startingOffsets, 
end=$endingOffsets)"
 }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-20416][SQL] Print UDF names in EXPLAIN

2017-05-11 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master 8c67aa7f0 -> 3aa4e464a


[SPARK-20416][SQL] Print UDF names in EXPLAIN

## What changes were proposed in this pull request?
This pr added `withName` in `UserDefinedFunction` for printing UDF names in 
EXPLAIN

## How was this patch tested?
Added tests in `UDFSuite`.

Author: Takeshi Yamamuro 

Closes #17712 from maropu/SPARK-20416.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3aa4e464
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3aa4e464
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3aa4e464

Branch: refs/heads/master
Commit: 3aa4e464a8c81994c6b7f76d445640da719af6ed
Parents: 8c67aa7
Author: Takeshi Yamamuro 
Authored: Thu May 11 09:49:05 2017 -0700
Committer: Xiao Li 
Committed: Thu May 11 09:49:05 2017 -0700

--
 .../apache/spark/ml/feature/Bucketizer.scala|  2 +-
 .../org/apache/spark/sql/UDFRegistration.scala  | 50 ++--
 .../sql/expressions/UserDefinedFunction.scala   | 13 +
 .../scala/org/apache/spark/sql/UDFSuite.scala   | 12 +++--
 4 files changed, 46 insertions(+), 31 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/3aa4e464/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
--
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
index bb8f2a3..46b512f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala
@@ -114,7 +114,7 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") 
override val uid: String
 
 val bucketizer: UserDefinedFunction = udf { (feature: Double) =>
   Bucketizer.binarySearchForBuckets($(splits), feature, keepInvalid)
-}
+}.withName("bucketizer")
 
 val newCol = bucketizer(filteredDataset($(inputCol)).cast(DoubleType))
 val newField = prepOutputField(filteredDataset.schema)

http://git-wip-us.apache.org/repos/asf/spark/blob/3aa4e464/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 5fd7123..1bceac4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, 
ScalaUDF}
 import org.apache.spark.sql.execution.aggregate.ScalaUDAF
 import org.apache.spark.sql.execution.python.UserDefinedPythonFunction
 import org.apache.spark.sql.expressions.{UserDefinedAggregateFunction, 
UserDefinedFunction}
-import org.apache.spark.sql.types.{DataType, DataTypes}
+import org.apache.spark.sql.types.DataType
 import org.apache.spark.util.Utils
 
 /**
@@ -114,7 +114,7 @@ class UDFRegistration private[sql] (functionRegistry: 
FunctionRegistry) extends
   val inputTypes = Try($inputTypes).toOption
   def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, 
inputTypes.getOrElse(Nil), Some(name), nullable)
   functionRegistry.registerFunction(name, builder)
-  UserDefinedFunction(func, dataType, 
inputTypes).withNullability(nullable)
+  UserDefinedFunction(func, dataType, 
inputTypes).withName(name).withNullability(nullable)
 }""")
 }
 
@@ -147,7 +147,7 @@ class UDFRegistration private[sql] (functionRegistry: 
FunctionRegistry) extends
 val inputTypes = Try(Nil).toOption
 def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, 
inputTypes.getOrElse(Nil), Some(name), nullable)
 functionRegistry.registerFunction(name, builder)
-UserDefinedFunction(func, dataType, inputTypes).withNullability(nullable)
+UserDefinedFunction(func, dataType, 
inputTypes).withName(name).withNullability(nullable)
   }
 
   /**
@@ -160,7 +160,7 @@ class UDFRegistration private[sql] (functionRegistry: 
FunctionRegistry) extends
 val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: 
Nil).toOption
 def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, 
inputTypes.getOrElse(Nil), Some(name), nullable)
 functionRegistry.registerFunction(name, builder)
-UserDefinedFunction(func, dataType, inputTypes).withNullability(nullable)
+UserDefinedFunction(func, dataType, 
inputTypes).withName(name).withNullability(nullable)
   }
 
   /**
@@ -173,7 +173,7 @@ class UDFRegistration private[sql] (functionRegistry: 
FunctionRegistry) 

spark git commit: [SPARK-20311][SQL] Support aliases for table value functions

2017-05-11 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/master b4c99f436 -> 8c67aa7f0


[SPARK-20311][SQL] Support aliases for table value functions

## What changes were proposed in this pull request?
This pr added parsing rules to support aliases in table value functions.
The previous pr (#17666) has been reverted because of the regression. This new 
pr fixed the regression and add tests in `SQLQueryTestSuite`.

## How was this patch tested?
Added tests in `PlanParserSuite` and `SQLQueryTestSuite`.

Author: Takeshi Yamamuro 

Closes #17928 from maropu/SPARK-20311-3.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8c67aa7f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8c67aa7f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8c67aa7f

Branch: refs/heads/master
Commit: 8c67aa7f00e0186abe05a1628faf2232b364a61f
Parents: b4c99f4
Author: Takeshi Yamamuro 
Authored: Thu May 11 18:09:31 2017 +0800
Committer: Wenchen Fan 
Committed: Thu May 11 18:09:31 2017 +0800

--
 .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 20 
 .../analysis/ResolveTableValuedFunctions.scala  | 22 --
 .../sql/catalyst/analysis/unresolved.scala  | 10 --
 .../spark/sql/catalyst/parser/AstBuilder.scala  | 17 ---
 .../sql/catalyst/analysis/AnalysisSuite.scala   | 14 -
 .../sql/catalyst/parser/PlanParserSuite.scala   | 13 +++-
 .../resources/sql-tests/inputs/inline-table.sql |  3 ++
 .../sql-tests/inputs/table-valued-functions.sql |  3 ++
 .../sql-tests/results/inline-table.sql.out  | 32 +++-
 .../results/table-valued-functions.sql.out  | 32 +++-
 10 files changed, 147 insertions(+), 19 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/8c67aa7f/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
--
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
index 14c511f..ed5450b 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -472,15 +472,23 @@ identifierComment
 ;
 
 relationPrimary
-: tableIdentifier sample? (AS? strictIdentifier)?   #tableName
-| '(' queryNoWith ')' sample? (AS? strictIdentifier)?   
#aliasedQuery
-| '(' relation ')' sample? (AS? strictIdentifier)?  
#aliasedRelation
-| inlineTable   
#inlineTableDefault2
-| identifier '(' (expression (',' expression)*)? ')'
#tableValuedFunction
+: tableIdentifier sample? (AS? strictIdentifier)?  #tableName
+| '(' queryNoWith ')' sample? (AS? strictIdentifier)?  #aliasedQuery
+| '(' relation ')' sample? (AS? strictIdentifier)? #aliasedRelation
+| inlineTable  #inlineTableDefault2
+| functionTable#tableValuedFunction
 ;
 
 inlineTable
-: VALUES expression (',' expression)*  (AS? identifier identifierList?)?
+: VALUES expression (',' expression)* tableAlias
+;
+
+functionTable
+: identifier '(' (expression (',' expression)*)? ')' tableAlias
+;
+
+tableAlias
+: (AS? strictIdentifier identifierList?)?
 ;
 
 rowFormat

http://git-wip-us.apache.org/repos/asf/spark/blob/8c67aa7f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
index de6de24..dad1340 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala
@@ -19,8 +19,8 @@ package org.apache.spark.sql.catalyst.analysis
 
 import java.util.Locale
 
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Range}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Expression}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, 
Range}
 import org.apache.spark.sql.catalyst.rules._
 import org.apache.spark.sql.types.{DataType, IntegerType, LongType}
 
@@ -105,7 +105,7 @@ object 

svn commit: r19618 - in /release/spark: spark-1.6.2/ spark-2.0.1/ spark-2.1.0/

2017-05-11 Thread srowen
Author: srowen
Date: Thu May 11 10:08:00 2017
New Revision: 19618

Log:
Delete non-current Spark releases

Removed:
release/spark/spark-1.6.2/
release/spark/spark-2.0.1/
release/spark/spark-2.1.0/


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark-website git commit: More dead link fixing

2017-05-11 Thread srowen
Repository: spark-website
Updated Branches:
  refs/heads/asf-site 62cf4a16d -> 5ed41c8d8


More dead link fixing


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/5ed41c8d
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/5ed41c8d
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/5ed41c8d

Branch: refs/heads/asf-site
Commit: 5ed41c8d8a6bbf03ce84f987ee9c57f6292e1aa6
Parents: 62cf4a1
Author: Sean Owen 
Authored: Thu May 11 11:02:31 2017 +0100
Committer: Sean Owen 
Committed: Thu May 11 11:02:31 2017 +0100

--
 faq.md  |  2 +-
 ...6-21-spark-accepted-into-apache-incubator.md |  2 +-
 news/_posts/2016-05-26-spark-2.0.0-preview.md   |  2 +-
 powered-by.md   |  9 ++---
 site/faq.html   |  2 +-
 site/js/downloads.js| 41 
 site/news/index.html|  4 +-
 site/news/spark-2-1-1-released.html |  1 +
 site/news/spark-2.0.0-preview.html  |  2 +-
 .../spark-accepted-into-apache-incubator.html   |  2 +-
 site/powered-by.html|  9 ++---
 11 files changed, 31 insertions(+), 45 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/5ed41c8d/faq.md
--
diff --git a/faq.md b/faq.md
index 281d7ca..614664c 100644
--- a/faq.md
+++ b/faq.md
@@ -71,4 +71,4 @@ Please also refer to our
 
 Where can I get more help?
 
-Please post on StackOverflow's https://stackoverflow.com/questions/tagged/apache-spark;>apache-spark
 tag or https://apache-spark-user-list.1001560.n3.nabble.com;>Spark 
Users mailing list.  For more information, please refer to https://spark.apache.org/community.html#have-questions;>Have 
Questions?.  We'll be glad to help!
+Please post on StackOverflow's https://stackoverflow.com/questions/tagged/apache-spark;>apache-spark
 tag or http://apache-spark-user-list.1001560.n3.nabble.com;>Spark 
Users mailing list.  For more information, please refer to https://spark.apache.org/community.html#have-questions;>Have 
Questions?.  We'll be glad to help!

http://git-wip-us.apache.org/repos/asf/spark-website/blob/5ed41c8d/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md
--
diff --git a/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md 
b/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md
index 1a6ca6d..a0ff02a 100644
--- a/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md
+++ b/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md
@@ -11,4 +11,4 @@ meta:
   _edit_last: '4'
   _wpas_done_all: '1'
 ---
-Spark was recently http://mail-archives.apache.org/mod_mbox/incubator-general/201306.mbox/%3CCDE7B773.E9A48%25chris.a.mattmann%40jpl.nasa.gov%3E;>accepted
 into the http://incubator.apache.org;>Apache Incubator, which 
will serve as the long-term home for the project. While moving the source code 
and issue tracking to Apache will take some time, we are excited to be joining 
the community at Apache. Stay tuned on this site for updates on how the project 
hosting will change.
+Spark was recently http://mail-archives.apache.org/mod_mbox/incubator-general/201306.mbox/%3ccde7b773.e9a48%25chris.a.mattm...@jpl.nasa.gov%3E;>accepted
 into the http://incubator.apache.org;>Apache Incubator, which 
will serve as the long-term home for the project. While moving the source code 
and issue tracking to Apache will take some time, we are excited to be joining 
the community at Apache. Stay tuned on this site for updates on how the project 
hosting will change.

http://git-wip-us.apache.org/repos/asf/spark-website/blob/5ed41c8d/news/_posts/2016-05-26-spark-2.0.0-preview.md
--
diff --git a/news/_posts/2016-05-26-spark-2.0.0-preview.md 
b/news/_posts/2016-05-26-spark-2.0.0-preview.md
index 053beb6..fb642f7 100644
--- a/news/_posts/2016-05-26-spark-2.0.0-preview.md
+++ b/news/_posts/2016-05-26-spark-2.0.0-preview.md
@@ -11,6 +11,6 @@ meta:
   _edit_last: '4'
   _wpas_done_all: '1'
 ---
-To enable wide-scale community testing of the upcoming Spark 2.0 release, the 
Apache Spark team has posted a https://dist.apache.org/repos/dist/release/spark/spark-2.0.0-preview/;>preview
 release of Spark 2.0. This preview is not a stable release in terms of 
either API or functionality, but it is meant to give the community early 
access to try the code that will become Spark 2.0. If you would like to test 
the release, simply download it, and send feedback using either the 

[1/2] spark-website git commit: Replace most http links with https as a best practice, where possible

2017-05-11 Thread srowen
Repository: spark-website
Updated Branches:
  refs/heads/asf-site c2c0905b4 -> 62cf4a16d


http://git-wip-us.apache.org/repos/asf/spark-website/blob/62cf4a16/site/sitemap.xml
--
diff --git a/site/sitemap.xml b/site/sitemap.xml
index bc93fb7..eb4e705 100644
--- a/site/sitemap.xml
+++ b/site/sitemap.xml
@@ -6,698 +6,698 @@
 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd;>
 
 
-  http://spark.apache.org/
+  https://spark.apache.org/
   daily
   1.0
 
 
 
-  http://spark.apache.org/docs/latest/index.html
+  https://spark.apache.org/docs/latest/index.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/quick-start.html
+  https://spark.apache.org/docs/latest/quick-start.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/programming-guide.html
+  https://spark.apache.org/docs/latest/programming-guide.html
   daily
   1.0
 
 
-  
http://spark.apache.org/docs/latest/streaming-programming-guide.html
+  
https://spark.apache.org/docs/latest/streaming-programming-guide.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/sql-programming-guide.html
+  https://spark.apache.org/docs/latest/sql-programming-guide.html
   daily
   1.0
 
 
-  
http://spark.apache.org/docs/latest/structured-streaming-programming-guide.html
+  
https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/ml-guide.html
+  https://spark.apache.org/docs/latest/ml-guide.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/graphx-programming-guide.html
+  https://spark.apache.org/docs/latest/graphx-programming-guide.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/sparkr.html
+  https://spark.apache.org/docs/latest/sparkr.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/cluster-overview.html
+  https://spark.apache.org/docs/latest/cluster-overview.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/submitting-applications.html
+  https://spark.apache.org/docs/latest/submitting-applications.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/spark-standalone.html
+  https://spark.apache.org/docs/latest/spark-standalone.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/running-on-mesos.html
+  https://spark.apache.org/docs/latest/running-on-mesos.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/running-on-yarn.html
+  https://spark.apache.org/docs/latest/running-on-yarn.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/configuration.html
+  https://spark.apache.org/docs/latest/configuration.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/monitoring.html
+  https://spark.apache.org/docs/latest/monitoring.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/tuning.html
+  https://spark.apache.org/docs/latest/tuning.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/job-scheduling.html
+  https://spark.apache.org/docs/latest/job-scheduling.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/security.html
+  https://spark.apache.org/docs/latest/security.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/hardware-provisioning.html
+  https://spark.apache.org/docs/latest/hardware-provisioning.html
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/building-spark.html
+  https://spark.apache.org/docs/latest/building-spark.html
   daily
   1.0
 
 
 
-  
http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package
+  
https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package
   daily
   1.0
 
 
-  http://spark.apache.org/docs/latest/api/java/index.html
+  https://spark.apache.org/docs/latest/api/java/index.html
   weekly
   1.0
 
 
-  http://spark.apache.org/docs/latest/api/python/index.html
+  https://spark.apache.org/docs/latest/api/python/index.html
   weekly
   1.0
 
 
-  http://spark.apache.org/docs/latest/api/R/index.html
+  https://spark.apache.org/docs/latest/api/R/index.html
   weekly
   1.0
 
 
 
-  http://spark.apache.org/releases/spark-release-2-1-1.html
+  https://spark.apache.org/releases/spark-release-2-1-1.html
   weekly
 
 
-  http://spark.apache.org/news/spark-2-1-1-released.html
+  https://spark.apache.org/news/spark-2-1-1-released.html
   weekly
 
 
-  
http://spark.apache.org/news/spark-summit-june-2017-agenda-posted.html
+  
https://spark.apache.org/news/spark-summit-june-2017-agenda-posted.html
   weekly
 
 
-  
http://spark.apache.org/news/spark-summit-east-2017-agenda-posted.html
+  
https://spark.apache.org/news/spark-summit-east-2017-agenda-posted.html
   weekly
 
 
-  http://spark.apache.org/releases/spark-release-2-1-0.html
+  https://spark.apache.org/releases/spark-release-2-1-0.html
   weekly
 
 
-  http://spark.apache.org/news/spark-2-1-0-released.html
+  

[2/2] spark-website git commit: Replace most http links with https as a best practice, where possible

2017-05-11 Thread srowen
Replace most http links with https as a best practice, where possible


Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/62cf4a16
Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/62cf4a16
Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/62cf4a16

Branch: refs/heads/asf-site
Commit: 62cf4a16daae3cf1b68745b8f676dbb29c167af2
Parents: c2c0905
Author: Sean Owen 
Authored: Wed May 10 10:56:35 2017 +0100
Committer: Sean Owen 
Committed: Wed May 10 19:02:39 2017 +0100

--
 _config.yml|   2 +-
 community.md   |   8 +-
 contributing.md|  10 +-
 developer-tools.md |   8 +-
 documentation.md   |  40 ++---
 downloads.md   |   4 +-
 examples.md|  10 +-
 faq.md |   6 +-
 index.md   |  12 +-
 mllib/index.md |   4 +-
 powered-by.md  |  12 +-
 release-process.md |   6 +-
 robots.txt |   2 +-
 site/community.html|   8 +-
 site/contributing.html |  10 +-
 site/developer-tools.html  |   8 +-
 site/documentation.html|  40 ++---
 site/downloads.html|   4 +-
 site/examples.html |  10 +-
 site/faq.html  |   6 +-
 site/index.html|  12 +-
 site/mailing-lists.html|   2 +-
 site/mllib/index.html  |   4 +-
 site/powered-by.html   |  15 +-
 site/release-process.html  |   6 +-
 site/robots.txt|   2 +-
 site/sitemap.xml   | 332 ++--
 site/streaming/index.html  |   8 +-
 site/third-party-projects.html |   8 +-
 site/trademarks.html   |   2 +-
 sitemap.xml|  52 +++---
 streaming/index.md |   8 +-
 third-party-projects.md|   8 +-
 trademarks.md  |   2 +-
 34 files changed, 332 insertions(+), 339 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark-website/blob/62cf4a16/_config.yml
--
diff --git a/_config.yml b/_config.yml
index 18ba30f..9a3934e 100644
--- a/_config.yml
+++ b/_config.yml
@@ -6,4 +6,4 @@ permalink: none
 destination: site
 exclude: ['README.md','content']
 keep_files: ['docs']
-url: http://spark.apache.org
\ No newline at end of file
+url: https://spark.apache.org
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/spark-website/blob/62cf4a16/community.md
--
diff --git a/community.md b/community.md
index 9fcb2b5..9fc6136 100644
--- a/community.md
+++ b/community.md
@@ -15,18 +15,18 @@ navigation:
 StackOverflow
 
 For usage questions and help (e.g. how to use this Spark API), it is 
recommended you use the 
-StackOverflow tag http://stackoverflow.com/questions/tagged/apache-spark;>`apache-spark`
 
+StackOverflow tag https://stackoverflow.com/questions/tagged/apache-spark;>`apache-spark`
 
 as it is an active forum for Spark users' questions and answers.
 
 Some quick tips when using StackOverflow:
 
 - Prior to asking submitting questions, please:
   - Search StackOverflow's 
-  http://stackoverflow.com/questions/tagged/apache-spark;>`apache-spark`
 tag to see if 
+  https://stackoverflow.com/questions/tagged/apache-spark;>`apache-spark`
 tag to see if 
   your question has already been answered
   - Search the nabble archive for
   http://apache-spark-user-list.1001560.n3.nabble.com/;>us...@spark.apache.org
 
-- Please follow the StackOverflow http://stackoverflow.com/help/how-to-ask;>code of conduct  
+- Please follow the StackOverflow https://stackoverflow.com/help/how-to-ask;>code of conduct  
 - Always use the `apache-spark` tag when asking questions
 - Please also use a secondary tag to specify components so subject matter 
experts can more easily find them.
  Examples include: `pyspark`, `spark-dataframe`, `spark-streaming`, `spark-r`, 
`spark-mllib`, 
@@ -58,7 +58,7 @@ project, and scenarios, it is recommended you use the 
u...@spark.apache.org mail
 Some quick tips when using email:
 
 - Prior to asking submitting questions, please:
-  - Search StackOverflow at http://stackoverflow.com/questions/tagged/apache-spark;>`apache-spark`
 
+  - Search StackOverflow at https://stackoverflow.com/questions/tagged/apache-spark;>`apache-spark`
 
   to see if your question has already been answered
   - Search the nabble archive for
   http://apache-spark-user-list.1001560.n3.nabble.com/;>us...@spark.apache.org
 

http://git-wip-us.apache.org/repos/asf/spark-website/blob/62cf4a16/contributing.md

spark git commit: [SPARK-20569][SQL] RuntimeReplaceable functions should not take extra parameters

2017-05-11 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 80a57fa90 -> dd9e3b2c9


[SPARK-20569][SQL] RuntimeReplaceable functions should not take extra parameters

## What changes were proposed in this pull request?

`RuntimeReplaceable` always has a constructor with the expression to replace 
with, and this constructor should not be the function builder.

## How was this patch tested?

new regression test

Author: Wenchen Fan 

Closes #17876 from cloud-fan/minor.

(cherry picked from commit b4c99f43690f8cfba414af90fa2b3998a510bba8)
Signed-off-by: Xiao Li 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dd9e3b2c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dd9e3b2c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dd9e3b2c

Branch: refs/heads/branch-2.2
Commit: dd9e3b2c976a4ef3b4837590a2ba0954cf73860d
Parents: 80a57fa
Author: Wenchen Fan 
Authored: Thu May 11 00:41:15 2017 -0700
Committer: Xiao Li 
Committed: Thu May 11 00:41:35 2017 -0700

--
 .../catalyst/analysis/FunctionRegistry.scala| 20 ++--
 .../org/apache/spark/sql/SQLQuerySuite.scala|  5 +
 2 files changed, 19 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/dd9e3b2c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index e1d83a8..6fc154f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import java.lang.reflect.Modifier
+
 import scala.language.existentials
 import scala.reflect.ClassTag
 import scala.util.{Failure, Success, Try}
@@ -455,8 +457,17 @@ object FunctionRegistry {
   private def expression[T <: Expression](name: String)
   (implicit tag: ClassTag[T]): (String, (ExpressionInfo, FunctionBuilder)) 
= {
 
+// For `RuntimeReplaceable`, skip the constructor with most arguments, 
which is the main
+// constructor and contains non-parameter `child` and should not be used 
as function builder.
+val constructors = if 
(classOf[RuntimeReplaceable].isAssignableFrom(tag.runtimeClass)) {
+  val all = tag.runtimeClass.getConstructors
+  val maxNumArgs = all.map(_.getParameterCount).max
+  all.filterNot(_.getParameterCount == maxNumArgs)
+} else {
+  tag.runtimeClass.getConstructors
+}
 // See if we can find a constructor that accepts Seq[Expression]
-val varargCtor = 
Try(tag.runtimeClass.getDeclaredConstructor(classOf[Seq[_]])).toOption
+val varargCtor = constructors.find(_.getParameterTypes.toSeq == 
Seq(classOf[Seq[_]]))
 val builder = (expressions: Seq[Expression]) => {
   if (varargCtor.isDefined) {
 // If there is an apply method that accepts Seq[Expression], use that 
one.
@@ -470,11 +481,8 @@ object FunctionRegistry {
   } else {
 // Otherwise, find a constructor method that matches the number of 
arguments, and use that.
 val params = Seq.fill(expressions.size)(classOf[Expression])
-val f = Try(tag.runtimeClass.getDeclaredConstructor(params : _*)) 
match {
-  case Success(e) =>
-e
-  case Failure(e) =>
-throw new AnalysisException(s"Invalid number of arguments for 
function $name")
+val f = constructors.find(_.getParameterTypes.toSeq == 
params).getOrElse {
+  throw new AnalysisException(s"Invalid number of arguments for 
function $name")
 }
 Try(f.newInstance(expressions : _*).asInstanceOf[Expression]) match {
   case Success(e) => e

http://git-wip-us.apache.org/repos/asf/spark/blob/dd9e3b2c/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 3ecbf96..cd14d24 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2619,4 +2619,9 @@ class SQLQuerySuite extends QueryTest with 
SharedSQLContext {
   new URL(jarFromInvalidFs)
 }
   }
+
+  test("RuntimeReplaceable functions should not take extra parameters") {
+val e = 

spark git commit: [SPARK-20569][SQL] RuntimeReplaceable functions should not take extra parameters

2017-05-11 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master 65accb813 -> b4c99f436


[SPARK-20569][SQL] RuntimeReplaceable functions should not take extra parameters

## What changes were proposed in this pull request?

`RuntimeReplaceable` always has a constructor with the expression to replace 
with, and this constructor should not be the function builder.

## How was this patch tested?

new regression test

Author: Wenchen Fan 

Closes #17876 from cloud-fan/minor.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4c99f43
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4c99f43
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4c99f43

Branch: refs/heads/master
Commit: b4c99f43690f8cfba414af90fa2b3998a510bba8
Parents: 65accb8
Author: Wenchen Fan 
Authored: Thu May 11 00:41:15 2017 -0700
Committer: Xiao Li 
Committed: Thu May 11 00:41:15 2017 -0700

--
 .../catalyst/analysis/FunctionRegistry.scala| 20 ++--
 .../org/apache/spark/sql/SQLQuerySuite.scala|  5 +
 2 files changed, 19 insertions(+), 6 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b4c99f43/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
--
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index e1d83a8..6fc154f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
+import java.lang.reflect.Modifier
+
 import scala.language.existentials
 import scala.reflect.ClassTag
 import scala.util.{Failure, Success, Try}
@@ -455,8 +457,17 @@ object FunctionRegistry {
   private def expression[T <: Expression](name: String)
   (implicit tag: ClassTag[T]): (String, (ExpressionInfo, FunctionBuilder)) 
= {
 
+// For `RuntimeReplaceable`, skip the constructor with most arguments, 
which is the main
+// constructor and contains non-parameter `child` and should not be used 
as function builder.
+val constructors = if 
(classOf[RuntimeReplaceable].isAssignableFrom(tag.runtimeClass)) {
+  val all = tag.runtimeClass.getConstructors
+  val maxNumArgs = all.map(_.getParameterCount).max
+  all.filterNot(_.getParameterCount == maxNumArgs)
+} else {
+  tag.runtimeClass.getConstructors
+}
 // See if we can find a constructor that accepts Seq[Expression]
-val varargCtor = 
Try(tag.runtimeClass.getDeclaredConstructor(classOf[Seq[_]])).toOption
+val varargCtor = constructors.find(_.getParameterTypes.toSeq == 
Seq(classOf[Seq[_]]))
 val builder = (expressions: Seq[Expression]) => {
   if (varargCtor.isDefined) {
 // If there is an apply method that accepts Seq[Expression], use that 
one.
@@ -470,11 +481,8 @@ object FunctionRegistry {
   } else {
 // Otherwise, find a constructor method that matches the number of 
arguments, and use that.
 val params = Seq.fill(expressions.size)(classOf[Expression])
-val f = Try(tag.runtimeClass.getDeclaredConstructor(params : _*)) 
match {
-  case Success(e) =>
-e
-  case Failure(e) =>
-throw new AnalysisException(s"Invalid number of arguments for 
function $name")
+val f = constructors.find(_.getParameterTypes.toSeq == 
params).getOrElse {
+  throw new AnalysisException(s"Invalid number of arguments for 
function $name")
 }
 Try(f.newInstance(expressions : _*).asInstanceOf[Expression]) match {
   case Success(e) => e

http://git-wip-us.apache.org/repos/asf/spark/blob/b4c99f43/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
--
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 3ecbf96..cd14d24 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2619,4 +2619,9 @@ class SQLQuerySuite extends QueryTest with 
SharedSQLContext {
   new URL(jarFromInvalidFs)
 }
   }
+
+  test("RuntimeReplaceable functions should not take extra parameters") {
+val e = intercept[AnalysisException](sql("SELECT nvl(1, 2, 3)"))
+assert(e.message.contains("Invalid number of arguments"))
+  }
 }



spark git commit: [SPARK-17029] make toJSON not go through rdd form but operate on dataset always

2017-05-11 Thread wenchen
Repository: spark
Updated Branches:
  refs/heads/master 0698e6c88 -> 65accb813


[SPARK-17029] make toJSON not go through rdd form but operate on dataset always

## What changes were proposed in this pull request?

Don't convert toRdd when doing toJSON
## How was this patch tested?

Existing unit tests

Author: Robert Kruszewski 

Closes #14615 from robert3005/robertk/correct-tojson.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/65accb81
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/65accb81
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/65accb81

Branch: refs/heads/master
Commit: 65accb813add9f58c1e9f1555863fe0bb1932ad8
Parents: 0698e6c
Author: Robert Kruszewski 
Authored: Thu May 11 15:26:48 2017 +0800
Committer: Wenchen Fan 
Committed: Thu May 11 15:26:48 2017 +0800

--
 .../src/main/scala/org/apache/spark/sql/Dataset.scala |  8 +++-
 .../spark/sql/execution/datasources/json/JsonSuite.scala  | 10 ++
 2 files changed, 13 insertions(+), 5 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/65accb81/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
--
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 61154e2..c75921e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -2806,7 +2806,7 @@ class Dataset[T] private[sql](
   def toJSON: Dataset[String] = {
 val rowSchema = this.schema
 val sessionLocalTimeZone = 
sparkSession.sessionState.conf.sessionLocalTimeZone
-val rdd: RDD[String] = queryExecution.toRdd.mapPartitions { iter =>
+mapPartitions { iter =>
   val writer = new CharArrayWriter()
   // create the Generator without separator inserted between 2 records
   val gen = new JacksonGenerator(rowSchema, writer,
@@ -2815,7 +2815,7 @@ class Dataset[T] private[sql](
   new Iterator[String] {
 override def hasNext: Boolean = iter.hasNext
 override def next(): String = {
-  gen.write(iter.next())
+  gen.write(exprEnc.toRow(iter.next()))
   gen.flush()
 
   val json = writer.toString
@@ -2828,9 +2828,7 @@ class Dataset[T] private[sql](
   json
 }
   }
-}
-import sparkSession.implicits.newStringEncoder
-sparkSession.createDataset(rdd)
+} (Encoders.STRING)
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/spark/blob/65accb81/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
--
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 2ab0381..5e7f794 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -31,6 +31,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.{functions => F, _}
 import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, 
JSONOptions}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.execution.ExternalRDD
 import org.apache.spark.sql.execution.datasources.DataSource
 import 
org.apache.spark.sql.execution.datasources.json.JsonInferSchema.compatibleType
 import org.apache.spark.sql.internal.SQLConf
@@ -1326,6 +1327,15 @@ class JsonSuite extends QueryTest with SharedSQLContext 
with TestJsonData {
 )
   }
 
+  test("Dataset toJSON doesn't construct rdd") {
+val containsRDD = spark.emptyDataFrame.toJSON.queryExecution.logical.find {
+  case ExternalRDD(_, _) => true
+  case _ => false
+}
+
+assert(containsRDD.isEmpty, "Expected logical plan of toJSON to not 
contain an RDD")
+  }
+
   test("JSONRelation equality test") {
 withTempPath(dir => {
   val path = dir.getCanonicalFile.toURI.toString


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



spark git commit: [SPARK-20606][ML] Revert "[] ML 2.2 QA: Remove deprecated methods for ML"

2017-05-11 Thread yliang
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 3eb0ee06a -> 80a57fa90


[SPARK-20606][ML] Revert "[] ML 2.2 QA: Remove deprecated methods for ML"

This reverts commit b8733e0ad9f5a700f385e210450fd2c10137293e.

Author: Yanbo Liang 

Closes #17944 from yanboliang/spark-20606-revert.

(cherry picked from commit 0698e6c88ca11fdfd6e5498cab784cf6dbcdfacb)
Signed-off-by: Yanbo Liang 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/80a57fa9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/80a57fa9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/80a57fa9

Branch: refs/heads/branch-2.2
Commit: 80a57fa90be8dca4340345c09b4ea28fbf11a516
Parents: 3eb0ee0
Author: Yanbo Liang 
Authored: Thu May 11 14:48:13 2017 +0800
Committer: Yanbo Liang 
Committed: Thu May 11 14:48:26 2017 +0800

--
 .../classification/DecisionTreeClassifier.scala |  18 ++--
 .../spark/ml/classification/GBTClassifier.scala |  24 ++---
 .../classification/RandomForestClassifier.scala |  24 ++---
 .../ml/regression/DecisionTreeRegressor.scala   |  18 ++--
 .../spark/ml/regression/GBTRegressor.scala  |  24 ++---
 .../ml/regression/RandomForestRegressor.scala   |  24 ++---
 .../org/apache/spark/ml/tree/treeParams.scala   | 105 +++
 .../org/apache/spark/ml/util/ReadWrite.scala|  16 +++
 project/MimaExcludes.scala  |  68 
 python/pyspark/ml/util.py   |  32 ++
 10 files changed, 219 insertions(+), 134 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/80a57fa9/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 5fb105c..9f60f08 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -54,27 +54,27 @@ class DecisionTreeClassifier @Since("1.4.0") (
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxBins(value: Int): this.type = set(maxBins, value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, 
value)
+  override def setMinInstancesPerNode(value: Int): this.type = 
set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, 
value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, 
value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, 
value)
 
   /**
* Specifies how often to checkpoint the cached node IDs.
@@ -86,15 +86,15 @@ class DecisionTreeClassifier @Since("1.4.0") (
* @group setParam
*/
   @Since("1.4.0")
-  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, 
value)
+  override def setCheckpointInterval(value: Int): this.type = 
set(checkpointInterval, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setImpurity(value: String): this.type = set(impurity, value)
+  override def setImpurity(value: String): this.type = set(impurity, value)
 
   /** @group setParam */
   @Since("1.6.0")
-  def setSeed(value: Long): this.type = set(seed, value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   override protected def train(dataset: Dataset[_]): 
DecisionTreeClassificationModel = {
 val categoricalFeatures: Map[Int, Int] =

http://git-wip-us.apache.org/repos/asf/spark/blob/80a57fa9/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala 
b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 263ed10..ade0960 100644
--- 

spark git commit: [SPARK-20606][ML] Revert "[] ML 2.2 QA: Remove deprecated methods for ML"

2017-05-11 Thread yliang
Repository: spark
Updated Branches:
  refs/heads/master 8ddbc431d -> 0698e6c88


[SPARK-20606][ML] Revert "[] ML 2.2 QA: Remove deprecated methods for ML"

This reverts commit b8733e0ad9f5a700f385e210450fd2c10137293e.

Author: Yanbo Liang 

Closes #17944 from yanboliang/spark-20606-revert.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0698e6c8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0698e6c8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0698e6c8

Branch: refs/heads/master
Commit: 0698e6c88ca11fdfd6e5498cab784cf6dbcdfacb
Parents: 8ddbc43
Author: Yanbo Liang 
Authored: Thu May 11 14:48:13 2017 +0800
Committer: Yanbo Liang 
Committed: Thu May 11 14:48:13 2017 +0800

--
 .../classification/DecisionTreeClassifier.scala |  18 ++--
 .../spark/ml/classification/GBTClassifier.scala |  24 ++---
 .../classification/RandomForestClassifier.scala |  24 ++---
 .../ml/regression/DecisionTreeRegressor.scala   |  18 ++--
 .../spark/ml/regression/GBTRegressor.scala  |  24 ++---
 .../ml/regression/RandomForestRegressor.scala   |  24 ++---
 .../org/apache/spark/ml/tree/treeParams.scala   | 105 +++
 .../org/apache/spark/ml/util/ReadWrite.scala|  16 +++
 project/MimaExcludes.scala  |  68 
 python/pyspark/ml/util.py   |  32 ++
 10 files changed, 219 insertions(+), 134 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/0698e6c8/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 5fb105c..9f60f08 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -54,27 +54,27 @@ class DecisionTreeClassifier @Since("1.4.0") (
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxDepth(value: Int): this.type = set(maxDepth, value)
+  override def setMaxDepth(value: Int): this.type = set(maxDepth, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMaxBins(value: Int): this.type = set(maxBins, value)
+  override def setMaxBins(value: Int): this.type = set(maxBins, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, 
value)
+  override def setMinInstancesPerNode(value: Int): this.type = 
set(minInstancesPerNode, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setMinInfoGain(value: Double): this.type = set(minInfoGain, value)
+  override def setMinInfoGain(value: Double): this.type = set(minInfoGain, 
value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value)
+  override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, 
value)
 
   /** @group expertSetParam */
   @Since("1.4.0")
-  def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value)
+  override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, 
value)
 
   /**
* Specifies how often to checkpoint the cached node IDs.
@@ -86,15 +86,15 @@ class DecisionTreeClassifier @Since("1.4.0") (
* @group setParam
*/
   @Since("1.4.0")
-  def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, 
value)
+  override def setCheckpointInterval(value: Int): this.type = 
set(checkpointInterval, value)
 
   /** @group setParam */
   @Since("1.4.0")
-  def setImpurity(value: String): this.type = set(impurity, value)
+  override def setImpurity(value: String): this.type = set(impurity, value)
 
   /** @group setParam */
   @Since("1.6.0")
-  def setSeed(value: Long): this.type = set(seed, value)
+  override def setSeed(value: Long): this.type = set(seed, value)
 
   override protected def train(dataset: Dataset[_]): 
DecisionTreeClassificationModel = {
 val categoricalFeatures: Map[Int, Int] =

http://git-wip-us.apache.org/repos/asf/spark/blob/0698e6c8/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
--
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala 
b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 263ed10..ade0960 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++