spark git commit: [SPARK-20718][SQL] FileSourceScanExec with different filter orders should be the same after canonicalization
Repository: spark Updated Branches: refs/heads/branch-2.2 2cac317a8 -> a8d981dc5 [SPARK-20718][SQL] FileSourceScanExec with different filter orders should be the same after canonicalization ## What changes were proposed in this pull request? Since `constraints` in `QueryPlan` is a set, the order of filters can differ. Usually this is ok because of canonicalization. However, in `FileSourceScanExec`, its data filters and partition filters are sequences, and their orders are not canonicalized. So `def sameResult` returns different results for different orders of data/partition filters. This leads to, e.g. different decision for `ReuseExchange`, and thus results in unstable performance. ## How was this patch tested? Added a new test for `FileSourceScanExec.sameResult`. Author: wangzhenhuaCloses #17959 from wzhfy/canonicalizeFileSourceScanExec. (cherry picked from commit c8da5356000c8e4ff9141e4a2892ebe0b9641d63) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8d981dc Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8d981dc Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8d981dc Branch: refs/heads/branch-2.2 Commit: a8d981dc5d11d65a4bd3a68aa57455b34a2649f9 Parents: 2cac317 Author: wangzhenhua Authored: Fri May 12 13:42:48 2017 +0800 Committer: Wenchen Fan Committed: Fri May 12 13:43:04 2017 +0800 -- .../sql/execution/DataSourceScanExec.scala | 16 +-- .../spark/sql/execution/SameResultSuite.scala | 49 2 files changed, 62 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a8d981dc/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index 866fa98..251098c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types.StructType import org.apache.spark.util.Utils -trait DataSourceScanExec extends LeafExecNode with CodegenSupport { +trait DataSourceScanExec extends LeafExecNode with CodegenSupport with PredicateHelper { val relation: BaseRelation val metastoreTableIdentifier: Option[TableIdentifier] @@ -519,8 +519,18 @@ case class FileSourceScanExec( relation, output.map(QueryPlan.normalizeExprId(_, output)), requiredSchema, - partitionFilters.map(QueryPlan.normalizeExprId(_, output)), - dataFilters.map(QueryPlan.normalizeExprId(_, output)), + canonicalizeFilters(partitionFilters, output), + canonicalizeFilters(dataFilters, output), None) } + + private def canonicalizeFilters(filters: Seq[Expression], output: Seq[Attribute]) +: Seq[Expression] = { +if (filters.nonEmpty) { + val normalizedFilters = QueryPlan.normalizeExprId(filters.reduce(And), output) + splitConjunctivePredicates(normalizedFilters) +} else { + Nil +} + } } http://git-wip-us.apache.org/repos/asf/spark/blob/a8d981dc/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala new file mode 100644 index 000..25e4ca0 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution +
spark git commit: [SPARK-20718][SQL] FileSourceScanExec with different filter orders should be the same after canonicalization
Repository: spark Updated Branches: refs/heads/master 2b36eb696 -> c8da53560 [SPARK-20718][SQL] FileSourceScanExec with different filter orders should be the same after canonicalization ## What changes were proposed in this pull request? Since `constraints` in `QueryPlan` is a set, the order of filters can differ. Usually this is ok because of canonicalization. However, in `FileSourceScanExec`, its data filters and partition filters are sequences, and their orders are not canonicalized. So `def sameResult` returns different results for different orders of data/partition filters. This leads to, e.g. different decision for `ReuseExchange`, and thus results in unstable performance. ## How was this patch tested? Added a new test for `FileSourceScanExec.sameResult`. Author: wangzhenhuaCloses #17959 from wzhfy/canonicalizeFileSourceScanExec. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8da5356 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8da5356 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8da5356 Branch: refs/heads/master Commit: c8da5356000c8e4ff9141e4a2892ebe0b9641d63 Parents: 2b36eb6 Author: wangzhenhua Authored: Fri May 12 13:42:48 2017 +0800 Committer: Wenchen Fan Committed: Fri May 12 13:42:48 2017 +0800 -- .../sql/execution/DataSourceScanExec.scala | 16 +-- .../spark/sql/execution/SameResultSuite.scala | 49 2 files changed, 62 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c8da5356/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala index 866fa98..251098c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala @@ -38,7 +38,7 @@ import org.apache.spark.sql.sources.BaseRelation import org.apache.spark.sql.types.StructType import org.apache.spark.util.Utils -trait DataSourceScanExec extends LeafExecNode with CodegenSupport { +trait DataSourceScanExec extends LeafExecNode with CodegenSupport with PredicateHelper { val relation: BaseRelation val metastoreTableIdentifier: Option[TableIdentifier] @@ -519,8 +519,18 @@ case class FileSourceScanExec( relation, output.map(QueryPlan.normalizeExprId(_, output)), requiredSchema, - partitionFilters.map(QueryPlan.normalizeExprId(_, output)), - dataFilters.map(QueryPlan.normalizeExprId(_, output)), + canonicalizeFilters(partitionFilters, output), + canonicalizeFilters(dataFilters, output), None) } + + private def canonicalizeFilters(filters: Seq[Expression], output: Seq[Attribute]) +: Seq[Expression] = { +if (filters.nonEmpty) { + val normalizedFilters = QueryPlan.normalizeExprId(filters.reduce(And), output) + splitConjunctivePredicates(normalizedFilters) +} else { + Nil +} + } } http://git-wip-us.apache.org/repos/asf/spark/blob/c8da5356/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala new file mode 100644 index 000..25e4ca0 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SameResultSuite.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import org.apache.spark.sql.{DataFrame, QueryTest} +import org.apache.spark.sql.test.SharedSQLContext + +/** + * Tests for the
spark git commit: [SPARK-20665][SQL] Bround" and "Round" function return NULL
Repository: spark Updated Branches: refs/heads/branch-2.0 d86dae8fe -> b2d0ed287 [SPARK-20665][SQL] Bround" and "Round" function return NULL spark-sql>select bround(12.3, 2); spark-sql>NULL For this case, the expected result is 12.3, but it is null. So ,when the second parameter is bigger than "decimal.scala", the result is not we expected. "round" function has the same problem. This PR can solve the problem for both of them. unit test cases in MathExpressionsSuite and MathFunctionsSuite Author: liuxianCloses #17906 from 10110346/wip_lx_0509. (cherry picked from commit 2b36eb696f6c738e1328582630755aaac4293460) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b2d0ed28 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b2d0ed28 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b2d0ed28 Branch: refs/heads/branch-2.0 Commit: b2d0ed2875fcc90a3ac70e857eb42bce9055e6d6 Parents: d86dae8 Author: liuxian Authored: Fri May 12 11:38:50 2017 +0800 Committer: Wenchen Fan Committed: Fri May 12 11:43:21 2017 +0800 -- .../sql/catalyst/expressions/mathExpressions.scala | 12 ++-- .../sql/catalyst/expressions/MathFunctionsSuite.scala | 7 +++ .../org/apache/spark/sql/MathExpressionsSuite.scala| 13 + 3 files changed, 22 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b2d0ed28/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala index 591e1e5..c7dfeb7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala @@ -845,10 +845,10 @@ abstract class RoundBase(child: Expression, scale: Expression, // not overriding since _scale is a constant int at runtime def nullSafeEval(input1: Any): Any = { -child.dataType match { - case _: DecimalType => +dataType match { + case DecimalType.Fixed(_, s) => val decimal = input1.asInstanceOf[Decimal] -if (decimal.changePrecision(decimal.precision, _scale, mode)) decimal else null +if (decimal.changePrecision(decimal.precision, s, mode)) decimal else null case ByteType => BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByte case ShortType => @@ -877,10 +877,10 @@ abstract class RoundBase(child: Expression, scale: Expression, override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val ce = child.genCode(ctx) -val evaluationCode = child.dataType match { - case _: DecimalType => +val evaluationCode = dataType match { + case DecimalType.Fixed(_, s) => s""" -if (${ce.value}.changePrecision(${ce.value}.precision(), ${_scale}, +if (${ce.value}.changePrecision(${ce.value}.precision(), ${s}, java.math.BigDecimal.${modeStr})) { ${ev.value} = ${ce.value}; } else { http://git-wip-us.apache.org/repos/asf/spark/blob/b2d0ed28/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala index f88c9e8..a08db2f 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala @@ -546,15 +546,14 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper { val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3.0), BigDecimal(3.1), BigDecimal(3.14), BigDecimal(3.142), BigDecimal(3.1416), BigDecimal(3.14159), BigDecimal(3.141593), BigDecimal(3.1415927)) -// round_scale > current_scale would result in precision increase -// and not allowed by o.a.s.s.types.Decimal.changePrecision, therefore null + (0 to 7).foreach { i => checkEvaluation(Round(bdPi, i), bdResults(i), EmptyRow) checkEvaluation(BRound(bdPi, i), bdResults(i), EmptyRow) } (8 to 10).foreach { scale => - checkEvaluation(Round(bdPi, scale), null, EmptyRow) -
spark git commit: [SPARK-20665][SQL] Bround" and "Round" function return NULL
Repository: spark Updated Branches: refs/heads/branch-2.1 92a71a667 -> 6e89d5740 [SPARK-20665][SQL] Bround" and "Round" function return NULL spark-sql>select bround(12.3, 2); spark-sql>NULL For this case, the expected result is 12.3, but it is null. So ,when the second parameter is bigger than "decimal.scala", the result is not we expected. "round" function has the same problem. This PR can solve the problem for both of them. unit test cases in MathExpressionsSuite and MathFunctionsSuite Author: liuxianCloses #17906 from 10110346/wip_lx_0509. (cherry picked from commit 2b36eb696f6c738e1328582630755aaac4293460) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6e89d574 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6e89d574 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6e89d574 Branch: refs/heads/branch-2.1 Commit: 6e89d574058bc2b96b14a691a07580be67f63707 Parents: 92a71a6 Author: liuxian Authored: Fri May 12 11:38:50 2017 +0800 Committer: Wenchen Fan Committed: Fri May 12 11:41:40 2017 +0800 -- .../sql/catalyst/expressions/mathExpressions.scala | 12 ++-- .../catalyst/expressions/MathExpressionsSuite.scala| 7 +++ .../org/apache/spark/sql/MathFunctionsSuite.scala | 13 + 3 files changed, 22 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/6e89d574/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala index 65273a7..54b8457 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala @@ -1021,10 +1021,10 @@ abstract class RoundBase(child: Expression, scale: Expression, // not overriding since _scale is a constant int at runtime def nullSafeEval(input1: Any): Any = { -child.dataType match { - case _: DecimalType => +dataType match { + case DecimalType.Fixed(_, s) => val decimal = input1.asInstanceOf[Decimal] -if (decimal.changePrecision(decimal.precision, _scale, mode)) decimal else null +if (decimal.changePrecision(decimal.precision, s, mode)) decimal else null case ByteType => BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByte case ShortType => @@ -1053,10 +1053,10 @@ abstract class RoundBase(child: Expression, scale: Expression, override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val ce = child.genCode(ctx) -val evaluationCode = child.dataType match { - case _: DecimalType => +val evaluationCode = dataType match { + case DecimalType.Fixed(_, s) => s""" -if (${ce.value}.changePrecision(${ce.value}.precision(), ${_scale}, +if (${ce.value}.changePrecision(${ce.value}.precision(), ${s}, java.math.BigDecimal.${modeStr})) { ${ev.value} = ${ce.value}; } else { http://git-wip-us.apache.org/repos/asf/spark/blob/6e89d574/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala index 6b5bfac..1555dd1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala @@ -546,15 +546,14 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3.0), BigDecimal(3.1), BigDecimal(3.14), BigDecimal(3.142), BigDecimal(3.1416), BigDecimal(3.14159), BigDecimal(3.141593), BigDecimal(3.1415927)) -// round_scale > current_scale would result in precision increase -// and not allowed by o.a.s.s.types.Decimal.changePrecision, therefore null + (0 to 7).foreach { i => checkEvaluation(Round(bdPi, i), bdResults(i), EmptyRow) checkEvaluation(BRound(bdPi, i), bdResults(i), EmptyRow) } (8 to 10).foreach { scale => - checkEvaluation(Round(bdPi, scale), null,
spark git commit: [SPARK-20665][SQL] Bround" and "Round" function return NULL
Repository: spark Updated Branches: refs/heads/branch-2.2 3d1908fd5 -> 2cac317a8 [SPARK-20665][SQL] Bround" and "Round" function return NULL ## What changes were proposed in this pull request? spark-sql>select bround(12.3, 2); spark-sql>NULL For this case, the expected result is 12.3, but it is null. So ,when the second parameter is bigger than "decimal.scala", the result is not we expected. "round" function has the same problem. This PR can solve the problem for both of them. ## How was this patch tested? unit test cases in MathExpressionsSuite and MathFunctionsSuite Author: liuxianCloses #17906 from 10110346/wip_lx_0509. (cherry picked from commit 2b36eb696f6c738e1328582630755aaac4293460) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2cac317a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2cac317a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2cac317a Branch: refs/heads/branch-2.2 Commit: 2cac317a84a234f034b0c75dcb5e4c27860a4cc0 Parents: 3d1908f Author: liuxian Authored: Fri May 12 11:38:50 2017 +0800 Committer: Wenchen Fan Committed: Fri May 12 11:39:02 2017 +0800 -- .../sql/catalyst/expressions/mathExpressions.scala | 12 ++-- .../catalyst/expressions/MathExpressionsSuite.scala| 7 +++ .../org/apache/spark/sql/MathFunctionsSuite.scala | 13 + 3 files changed, 22 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2cac317a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala index c4d47ab..de1a46d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala @@ -1023,10 +1023,10 @@ abstract class RoundBase(child: Expression, scale: Expression, // not overriding since _scale is a constant int at runtime def nullSafeEval(input1: Any): Any = { -child.dataType match { - case _: DecimalType => +dataType match { + case DecimalType.Fixed(_, s) => val decimal = input1.asInstanceOf[Decimal] -decimal.toPrecision(decimal.precision, _scale, mode).orNull +decimal.toPrecision(decimal.precision, s, mode).orNull case ByteType => BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByte case ShortType => @@ -1055,10 +1055,10 @@ abstract class RoundBase(child: Expression, scale: Expression, override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val ce = child.genCode(ctx) -val evaluationCode = child.dataType match { - case _: DecimalType => +val evaluationCode = dataType match { + case DecimalType.Fixed(_, s) => s""" -if (${ce.value}.changePrecision(${ce.value}.precision(), ${_scale}, +if (${ce.value}.changePrecision(${ce.value}.precision(), ${s}, java.math.BigDecimal.${modeStr})) { ${ev.value} = ${ce.value}; } else { http://git-wip-us.apache.org/repos/asf/spark/blob/2cac317a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala index 6b5bfac..1555dd1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala @@ -546,15 +546,14 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3.0), BigDecimal(3.1), BigDecimal(3.14), BigDecimal(3.142), BigDecimal(3.1416), BigDecimal(3.14159), BigDecimal(3.141593), BigDecimal(3.1415927)) -// round_scale > current_scale would result in precision increase -// and not allowed by o.a.s.s.types.Decimal.changePrecision, therefore null + (0 to 7).foreach { i => checkEvaluation(Round(bdPi, i), bdResults(i), EmptyRow) checkEvaluation(BRound(bdPi, i), bdResults(i), EmptyRow) } (8 to 10).foreach { scale => -
spark git commit: [SPARK-20665][SQL] Bround" and "Round" function return NULL
Repository: spark Updated Branches: refs/heads/master 609ba5f2b -> 2b36eb696 [SPARK-20665][SQL] Bround" and "Round" function return NULL ## What changes were proposed in this pull request? spark-sql>select bround(12.3, 2); spark-sql>NULL For this case, the expected result is 12.3, but it is null. So ,when the second parameter is bigger than "decimal.scala", the result is not we expected. "round" function has the same problem. This PR can solve the problem for both of them. ## How was this patch tested? unit test cases in MathExpressionsSuite and MathFunctionsSuite Author: liuxianCloses #17906 from 10110346/wip_lx_0509. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2b36eb69 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2b36eb69 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2b36eb69 Branch: refs/heads/master Commit: 2b36eb696f6c738e1328582630755aaac4293460 Parents: 609ba5f Author: liuxian Authored: Fri May 12 11:38:50 2017 +0800 Committer: Wenchen Fan Committed: Fri May 12 11:38:50 2017 +0800 -- .../sql/catalyst/expressions/mathExpressions.scala | 12 ++-- .../catalyst/expressions/MathExpressionsSuite.scala| 7 +++ .../org/apache/spark/sql/MathFunctionsSuite.scala | 13 + 3 files changed, 22 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/2b36eb69/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala index c4d47ab..de1a46d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala @@ -1023,10 +1023,10 @@ abstract class RoundBase(child: Expression, scale: Expression, // not overriding since _scale is a constant int at runtime def nullSafeEval(input1: Any): Any = { -child.dataType match { - case _: DecimalType => +dataType match { + case DecimalType.Fixed(_, s) => val decimal = input1.asInstanceOf[Decimal] -decimal.toPrecision(decimal.precision, _scale, mode).orNull +decimal.toPrecision(decimal.precision, s, mode).orNull case ByteType => BigDecimal(input1.asInstanceOf[Byte]).setScale(_scale, mode).toByte case ShortType => @@ -1055,10 +1055,10 @@ abstract class RoundBase(child: Expression, scale: Expression, override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val ce = child.genCode(ctx) -val evaluationCode = child.dataType match { - case _: DecimalType => +val evaluationCode = dataType match { + case DecimalType.Fixed(_, s) => s""" -if (${ce.value}.changePrecision(${ce.value}.precision(), ${_scale}, +if (${ce.value}.changePrecision(${ce.value}.precision(), ${s}, java.math.BigDecimal.${modeStr})) { ${ev.value} = ${ce.value}; } else { http://git-wip-us.apache.org/repos/asf/spark/blob/2b36eb69/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala index 6b5bfac..1555dd1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathExpressionsSuite.scala @@ -546,15 +546,14 @@ class MathExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { val bdResults: Seq[BigDecimal] = Seq(BigDecimal(3.0), BigDecimal(3.1), BigDecimal(3.14), BigDecimal(3.142), BigDecimal(3.1416), BigDecimal(3.14159), BigDecimal(3.141593), BigDecimal(3.1415927)) -// round_scale > current_scale would result in precision increase -// and not allowed by o.a.s.s.types.Decimal.changePrecision, therefore null + (0 to 7).foreach { i => checkEvaluation(Round(bdPi, i), bdResults(i), EmptyRow) checkEvaluation(BRound(bdPi, i), bdResults(i), EmptyRow) } (8 to 10).foreach { scale => - checkEvaluation(Round(bdPi, scale), null, EmptyRow) - checkEvaluation(BRound(bdPi, scale), null, EmptyRow) +
spark git commit: [SPARK-20399][SQL] Add a config to fallback string literal parsing consistent with old sql parser behavior
Repository: spark Updated Branches: refs/heads/master 04901dd03 -> 609ba5f2b [SPARK-20399][SQL] Add a config to fallback string literal parsing consistent with old sql parser behavior ## What changes were proposed in this pull request? The new SQL parser is introduced into Spark 2.0. All string literals are unescaped in parser. Seems it bring an issue regarding the regex pattern string. The following codes can reproduce it: val data = Seq("\u0020\u0021\u0023", "abc") val df = data.toDF() // 1st usage: works in 1.6 // Let parser parse pattern string val rlike1 = df.filter("value rlike '^\\x20[\\x20-\\x23]+$'") // 2nd usage: works in 1.6, 2.x // Call Column.rlike so the pattern string is a literal which doesn't go through parser val rlike2 = df.filter($"value".rlike("^\\x20[\\x20-\\x23]+$")) // In 2.x, we need add backslashes to make regex pattern parsed correctly val rlike3 = df.filter("value rlike '^x20[x20-x23]+$'") Follow the discussion in #17736, this patch adds a config to fallback to 1.6 string literal parsing and mitigate migration issue. ## How was this patch tested? Jenkins tests. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Liang-Chi HsiehCloses #17887 from viirya/add-config-fallback-string-parsing. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/609ba5f2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/609ba5f2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/609ba5f2 Branch: refs/heads/master Commit: 609ba5f2b9fd89b1b9971d08f7cc680d202dbc7c Parents: 04901dd Author: Liang-Chi Hsieh Authored: Fri May 12 11:15:10 2017 +0800 Committer: Wenchen Fan Committed: Fri May 12 11:15:10 2017 +0800 -- .../sql/catalyst/catalog/SessionCatalog.scala | 2 +- .../expressions/regexpExpressions.scala | 33 - .../spark/sql/catalyst/parser/AstBuilder.scala | 11 +- .../spark/sql/catalyst/parser/ParseDriver.scala | 8 +- .../spark/sql/catalyst/parser/ParserUtils.scala | 6 + .../org/apache/spark/sql/internal/SQLConf.scala | 10 ++ .../catalyst/parser/ExpressionParserSuite.scala | 128 +-- .../spark/sql/execution/SparkSqlParser.scala| 2 +- .../org/apache/spark/sql/DatasetSuite.scala | 13 ++ 9 files changed, 171 insertions(+), 42 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/609ba5f2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 18e5146..f6653d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -73,7 +73,7 @@ class SessionCatalog( functionRegistry, conf, new Configuration(), - CatalystSqlParser, + new CatalystSqlParser(conf), DummyFunctionResourceLoader) } http://git-wip-us.apache.org/repos/asf/spark/blob/609ba5f2/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 3fa8458..aa5a1b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -86,6 +86,13 @@ abstract class StringRegexExpression extends BinaryExpression escape character, the following character is matched literally. It is invalid to escape any other character. +Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order +to match "\abc", the pattern should be "\\abc". + +When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it fallbacks +to Spark 1.6 behavior regarding string literal parsing. For example, if the config is +enabled, the pattern to match "\abc" should be "\abc". + Examples: > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%' true @@ -144,7 +151,31 @@ case class Like(left: Expression, right: Expression) extends StringRegexExpressi } @ExpressionDescription( -
spark git commit: [SPARK-20399][SQL] Add a config to fallback string literal parsing consistent with old sql parser behavior
Repository: spark Updated Branches: refs/heads/branch-2.2 5844151bc -> 3d1908fd5 [SPARK-20399][SQL] Add a config to fallback string literal parsing consistent with old sql parser behavior ## What changes were proposed in this pull request? The new SQL parser is introduced into Spark 2.0. All string literals are unescaped in parser. Seems it bring an issue regarding the regex pattern string. The following codes can reproduce it: val data = Seq("\u0020\u0021\u0023", "abc") val df = data.toDF() // 1st usage: works in 1.6 // Let parser parse pattern string val rlike1 = df.filter("value rlike '^\\x20[\\x20-\\x23]+$'") // 2nd usage: works in 1.6, 2.x // Call Column.rlike so the pattern string is a literal which doesn't go through parser val rlike2 = df.filter($"value".rlike("^\\x20[\\x20-\\x23]+$")) // In 2.x, we need add backslashes to make regex pattern parsed correctly val rlike3 = df.filter("value rlike '^x20[x20-x23]+$'") Follow the discussion in #17736, this patch adds a config to fallback to 1.6 string literal parsing and mitigate migration issue. ## How was this patch tested? Jenkins tests. Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Liang-Chi HsiehCloses #17887 from viirya/add-config-fallback-string-parsing. (cherry picked from commit 609ba5f2b9fd89b1b9971d08f7cc680d202dbc7c) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3d1908fd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3d1908fd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3d1908fd Branch: refs/heads/branch-2.2 Commit: 3d1908fd58fd9b1970cbffebdb731bfe4c776ad9 Parents: 5844151 Author: Liang-Chi Hsieh Authored: Fri May 12 11:15:10 2017 +0800 Committer: Wenchen Fan Committed: Fri May 12 11:15:26 2017 +0800 -- .../sql/catalyst/catalog/SessionCatalog.scala | 2 +- .../expressions/regexpExpressions.scala | 33 - .../spark/sql/catalyst/parser/AstBuilder.scala | 11 +- .../spark/sql/catalyst/parser/ParseDriver.scala | 8 +- .../spark/sql/catalyst/parser/ParserUtils.scala | 6 + .../org/apache/spark/sql/internal/SQLConf.scala | 10 ++ .../catalyst/parser/ExpressionParserSuite.scala | 128 +-- .../spark/sql/execution/SparkSqlParser.scala| 2 +- .../org/apache/spark/sql/DatasetSuite.scala | 13 ++ 9 files changed, 171 insertions(+), 42 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3d1908fd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala index 18e5146..f6653d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala @@ -73,7 +73,7 @@ class SessionCatalog( functionRegistry, conf, new Configuration(), - CatalystSqlParser, + new CatalystSqlParser(conf), DummyFunctionResourceLoader) } http://git-wip-us.apache.org/repos/asf/spark/blob/3d1908fd/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala index 3fa8458..aa5a1b5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala @@ -86,6 +86,13 @@ abstract class StringRegexExpression extends BinaryExpression escape character, the following character is matched literally. It is invalid to escape any other character. +Since Spark 2.0, string literals are unescaped in our SQL parser. For example, in order +to match "\abc", the pattern should be "\\abc". + +When SQL config 'spark.sql.parser.escapedStringLiterals' is enabled, it fallbacks +to Spark 1.6 behavior regarding string literal parsing. For example, if the config is +enabled, the pattern to match "\abc" should be "\abc". + Examples: > SELECT '%SystemDrive%\Users\John' _FUNC_ '\%SystemDrive\%\\Users%' true @@
spark git commit: [SPARK-20431][SQL] Specify a schema by using a DDL-formatted string
Repository: spark Updated Branches: refs/heads/master 7144b5180 -> 04901dd03 [SPARK-20431][SQL] Specify a schema by using a DDL-formatted string ## What changes were proposed in this pull request? This pr supported a DDL-formatted string in `DataFrameReader.schema`. This fix could make users easily define a schema without importing `o.a.spark.sql.types._`. ## How was this patch tested? Added tests in `DataFrameReaderWriterSuite`. Author: Takeshi YamamuroCloses #17719 from maropu/SPARK-20431. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/04901dd0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/04901dd0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/04901dd0 Branch: refs/heads/master Commit: 04901dd03a3f8062fd39ea38d585935ff71a9248 Parents: 7144b51 Author: Takeshi Yamamuro Authored: Thu May 11 11:06:29 2017 -0700 Committer: Xiao Li Committed: Thu May 11 11:06:29 2017 -0700 -- python/pyspark/sql/readwriter.py| 23 +--- .../org/apache/spark/sql/DataFrameReader.scala | 12 ++ .../sql/test/DataFrameReaderWriterSuite.scala | 9 3 files changed, 36 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/04901dd0/python/pyspark/sql/readwriter.py -- diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 61a6b76..5cf719b 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -96,14 +96,18 @@ class DataFrameReader(OptionUtils): By specifying the schema here, the underlying data source can skip the schema inference step, and thus speed up data loading. -:param schema: a :class:`pyspark.sql.types.StructType` object +:param schema: a :class:`pyspark.sql.types.StructType` object or a DDL-formatted string + (For example ``col0 INT, col1 DOUBLE``). """ from pyspark.sql import SparkSession -if not isinstance(schema, StructType): -raise TypeError("schema should be StructType") spark = SparkSession.builder.getOrCreate() -jschema = spark._jsparkSession.parseDataType(schema.json()) -self._jreader = self._jreader.schema(jschema) +if isinstance(schema, StructType): +jschema = spark._jsparkSession.parseDataType(schema.json()) +self._jreader = self._jreader.schema(jschema) +elif isinstance(schema, basestring): +self._jreader = self._jreader.schema(schema) +else: +raise TypeError("schema should be StructType or string") return self @since(1.5) @@ -137,7 +141,8 @@ class DataFrameReader(OptionUtils): :param path: optional string or a list of string for file-system backed data sources. :param format: optional string for format of the data source. Default to 'parquet'. -:param schema: optional :class:`pyspark.sql.types.StructType` for the input schema. +:param schema: optional :class:`pyspark.sql.types.StructType` for the input schema + or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). :param options: all other string options >>> df = spark.read.load('python/test_support/sql/parquet_partitioned', opt1=True, @@ -181,7 +186,8 @@ class DataFrameReader(OptionUtils): :param path: string represents path to the JSON dataset, or a list of paths, or RDD of Strings storing JSON objects. -:param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema. +:param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema or + a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). :param primitivesAsString: infers all primitive values as a string type. If None is set, it uses the default value, ``false``. :param prefersDecimal: infers all floating-point values as a decimal type. If the values @@ -324,7 +330,8 @@ class DataFrameReader(OptionUtils): ``inferSchema`` option or specify the schema explicitly using ``schema``. :param path: string, or list of strings, for input path(s). -:param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema. +:param schema: an optional :class:`pyspark.sql.types.StructType` for the input schema + or a DDL-formatted string (For example ``col0 INT, col1 DOUBLE``). :param sep: sets the single character as a separator for each
spark git commit: [SPARK-20600][SS] KafkaRelation should be pretty printed in web UI
Repository: spark Updated Branches: refs/heads/master 3aa4e464a -> 7144b5180 [SPARK-20600][SS] KafkaRelation should be pretty printed in web UI ## What changes were proposed in this pull request? User-friendly name of `KafkaRelation` in web UI (under Details for Query). ### Before https://cloud.githubusercontent.com/assets/62313/25841955/74479ac6-34a2-11e7-87fb-d9f62a1356a7.png;> ### After https://cloud.githubusercontent.com/assets/62313/25841829/f5335630-34a1-11e7-85a4-afe9b66d73c8.png;> ## How was this patch tested? Local build ``` ./bin/spark-shell --jars ~/.m2/repository/org/apache/spark/spark-sql-kafka-0-10_2.11/2.3.0-SNAPSHOT/spark-sql-kafka-0-10_2.11-2.3.0-SNAPSHOT.jar --packages org.apache.kafka:kafka-clients:0.10.0.1 ``` Author: Jacek LaskowskiCloses #17917 from jaceklaskowski/SPARK-20600-KafkaRelation-webUI. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7144b518 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7144b518 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7144b518 Branch: refs/heads/master Commit: 7144b51809aa99ac076786c369389e2330142beb Parents: 3aa4e46 Author: Jacek Laskowski Authored: Thu May 11 10:55:11 2017 -0700 Committer: Shixiong Zhu Committed: Thu May 11 10:55:11 2017 -0700 -- .../main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7144b518/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala -- diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala index 97bd283..7103709 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala @@ -143,4 +143,7 @@ private[kafka010] class KafkaRelation( validateTopicPartitions(partitions, partitionOffsets) } } + + override def toString: String = +s"KafkaRelation(strategy=$strategy, start=$startingOffsets, end=$endingOffsets)" } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20600][SS] KafkaRelation should be pretty printed in web UI
Repository: spark Updated Branches: refs/heads/branch-2.2 dd9e3b2c9 -> 5844151bc [SPARK-20600][SS] KafkaRelation should be pretty printed in web UI ## What changes were proposed in this pull request? User-friendly name of `KafkaRelation` in web UI (under Details for Query). ### Before https://cloud.githubusercontent.com/assets/62313/25841955/74479ac6-34a2-11e7-87fb-d9f62a1356a7.png;> ### After https://cloud.githubusercontent.com/assets/62313/25841829/f5335630-34a1-11e7-85a4-afe9b66d73c8.png;> ## How was this patch tested? Local build ``` ./bin/spark-shell --jars ~/.m2/repository/org/apache/spark/spark-sql-kafka-0-10_2.11/2.3.0-SNAPSHOT/spark-sql-kafka-0-10_2.11-2.3.0-SNAPSHOT.jar --packages org.apache.kafka:kafka-clients:0.10.0.1 ``` Author: Jacek LaskowskiCloses #17917 from jaceklaskowski/SPARK-20600-KafkaRelation-webUI. (cherry picked from commit 7144b51809aa99ac076786c369389e2330142beb) Signed-off-by: Shixiong Zhu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5844151b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5844151b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5844151b Branch: refs/heads/branch-2.2 Commit: 5844151bc8e410e7d5b48990bfc9d3c55926f56f Parents: dd9e3b2 Author: Jacek Laskowski Authored: Thu May 11 10:55:11 2017 -0700 Committer: Shixiong Zhu Committed: Thu May 11 10:55:31 2017 -0700 -- .../main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala | 3 +++ 1 file changed, 3 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/5844151b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala -- diff --git a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala index 97bd283..7103709 100644 --- a/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala +++ b/external/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRelation.scala @@ -143,4 +143,7 @@ private[kafka010] class KafkaRelation( validateTopicPartitions(partitions, partitionOffsets) } } + + override def toString: String = +s"KafkaRelation(strategy=$strategy, start=$startingOffsets, end=$endingOffsets)" } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20416][SQL] Print UDF names in EXPLAIN
Repository: spark Updated Branches: refs/heads/master 8c67aa7f0 -> 3aa4e464a [SPARK-20416][SQL] Print UDF names in EXPLAIN ## What changes were proposed in this pull request? This pr added `withName` in `UserDefinedFunction` for printing UDF names in EXPLAIN ## How was this patch tested? Added tests in `UDFSuite`. Author: Takeshi YamamuroCloses #17712 from maropu/SPARK-20416. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3aa4e464 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3aa4e464 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3aa4e464 Branch: refs/heads/master Commit: 3aa4e464a8c81994c6b7f76d445640da719af6ed Parents: 8c67aa7 Author: Takeshi Yamamuro Authored: Thu May 11 09:49:05 2017 -0700 Committer: Xiao Li Committed: Thu May 11 09:49:05 2017 -0700 -- .../apache/spark/ml/feature/Bucketizer.scala| 2 +- .../org/apache/spark/sql/UDFRegistration.scala | 50 ++-- .../sql/expressions/UserDefinedFunction.scala | 13 + .../scala/org/apache/spark/sql/UDFSuite.scala | 12 +++-- 4 files changed, 46 insertions(+), 31 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3aa4e464/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala index bb8f2a3..46b512f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -114,7 +114,7 @@ final class Bucketizer @Since("1.4.0") (@Since("1.4.0") override val uid: String val bucketizer: UserDefinedFunction = udf { (feature: Double) => Bucketizer.binarySearchForBuckets($(splits), feature, keepInvalid) -} +}.withName("bucketizer") val newCol = bucketizer(filteredDataset($(inputCol)).cast(DoubleType)) val newField = prepOutputField(filteredDataset.schema) http://git-wip-us.apache.org/repos/asf/spark/blob/3aa4e464/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala index 5fd7123..1bceac4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala @@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF} import org.apache.spark.sql.execution.aggregate.ScalaUDAF import org.apache.spark.sql.execution.python.UserDefinedPythonFunction import org.apache.spark.sql.expressions.{UserDefinedAggregateFunction, UserDefinedFunction} -import org.apache.spark.sql.types.{DataType, DataTypes} +import org.apache.spark.sql.types.DataType import org.apache.spark.util.Utils /** @@ -114,7 +114,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends val inputTypes = Try($inputTypes).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable) functionRegistry.registerFunction(name, builder) - UserDefinedFunction(func, dataType, inputTypes).withNullability(nullable) + UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable) }""") } @@ -147,7 +147,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends val inputTypes = Try(Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable) functionRegistry.registerFunction(name, builder) -UserDefinedFunction(func, dataType, inputTypes).withNullability(nullable) +UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable) } /** @@ -160,7 +160,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil), Some(name), nullable) functionRegistry.registerFunction(name, builder) -UserDefinedFunction(func, dataType, inputTypes).withNullability(nullable) +UserDefinedFunction(func, dataType, inputTypes).withName(name).withNullability(nullable) } /** @@ -173,7 +173,7 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry)
spark git commit: [SPARK-20311][SQL] Support aliases for table value functions
Repository: spark Updated Branches: refs/heads/master b4c99f436 -> 8c67aa7f0 [SPARK-20311][SQL] Support aliases for table value functions ## What changes were proposed in this pull request? This pr added parsing rules to support aliases in table value functions. The previous pr (#17666) has been reverted because of the regression. This new pr fixed the regression and add tests in `SQLQueryTestSuite`. ## How was this patch tested? Added tests in `PlanParserSuite` and `SQLQueryTestSuite`. Author: Takeshi YamamuroCloses #17928 from maropu/SPARK-20311-3. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8c67aa7f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8c67aa7f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8c67aa7f Branch: refs/heads/master Commit: 8c67aa7f00e0186abe05a1628faf2232b364a61f Parents: b4c99f4 Author: Takeshi Yamamuro Authored: Thu May 11 18:09:31 2017 +0800 Committer: Wenchen Fan Committed: Thu May 11 18:09:31 2017 +0800 -- .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 20 .../analysis/ResolveTableValuedFunctions.scala | 22 -- .../sql/catalyst/analysis/unresolved.scala | 10 -- .../spark/sql/catalyst/parser/AstBuilder.scala | 17 --- .../sql/catalyst/analysis/AnalysisSuite.scala | 14 - .../sql/catalyst/parser/PlanParserSuite.scala | 13 +++- .../resources/sql-tests/inputs/inline-table.sql | 3 ++ .../sql-tests/inputs/table-valued-functions.sql | 3 ++ .../sql-tests/results/inline-table.sql.out | 32 +++- .../results/table-valued-functions.sql.out | 32 +++- 10 files changed, 147 insertions(+), 19 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8c67aa7f/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 -- diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index 14c511f..ed5450b 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -472,15 +472,23 @@ identifierComment ; relationPrimary -: tableIdentifier sample? (AS? strictIdentifier)? #tableName -| '(' queryNoWith ')' sample? (AS? strictIdentifier)? #aliasedQuery -| '(' relation ')' sample? (AS? strictIdentifier)? #aliasedRelation -| inlineTable #inlineTableDefault2 -| identifier '(' (expression (',' expression)*)? ')' #tableValuedFunction +: tableIdentifier sample? (AS? strictIdentifier)? #tableName +| '(' queryNoWith ')' sample? (AS? strictIdentifier)? #aliasedQuery +| '(' relation ')' sample? (AS? strictIdentifier)? #aliasedRelation +| inlineTable #inlineTableDefault2 +| functionTable#tableValuedFunction ; inlineTable -: VALUES expression (',' expression)* (AS? identifier identifierList?)? +: VALUES expression (',' expression)* tableAlias +; + +functionTable +: identifier '(' (expression (',' expression)*)? ')' tableAlias +; + +tableAlias +: (AS? strictIdentifier identifierList?)? ; rowFormat http://git-wip-us.apache.org/repos/asf/spark/blob/8c67aa7f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala index de6de24..dad1340 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTableValuedFunctions.scala @@ -19,8 +19,8 @@ package org.apache.spark.sql.catalyst.analysis import java.util.Locale -import org.apache.spark.sql.catalyst.expressions.Expression -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Range} +import org.apache.spark.sql.catalyst.expressions.{Alias, Expression} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Range} import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.types.{DataType, IntegerType, LongType} @@ -105,7 +105,7 @@ object
svn commit: r19618 - in /release/spark: spark-1.6.2/ spark-2.0.1/ spark-2.1.0/
Author: srowen Date: Thu May 11 10:08:00 2017 New Revision: 19618 Log: Delete non-current Spark releases Removed: release/spark/spark-1.6.2/ release/spark/spark-2.0.1/ release/spark/spark-2.1.0/ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark-website git commit: More dead link fixing
Repository: spark-website Updated Branches: refs/heads/asf-site 62cf4a16d -> 5ed41c8d8 More dead link fixing Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/5ed41c8d Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/5ed41c8d Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/5ed41c8d Branch: refs/heads/asf-site Commit: 5ed41c8d8a6bbf03ce84f987ee9c57f6292e1aa6 Parents: 62cf4a1 Author: Sean OwenAuthored: Thu May 11 11:02:31 2017 +0100 Committer: Sean Owen Committed: Thu May 11 11:02:31 2017 +0100 -- faq.md | 2 +- ...6-21-spark-accepted-into-apache-incubator.md | 2 +- news/_posts/2016-05-26-spark-2.0.0-preview.md | 2 +- powered-by.md | 9 ++--- site/faq.html | 2 +- site/js/downloads.js| 41 site/news/index.html| 4 +- site/news/spark-2-1-1-released.html | 1 + site/news/spark-2.0.0-preview.html | 2 +- .../spark-accepted-into-apache-incubator.html | 2 +- site/powered-by.html| 9 ++--- 11 files changed, 31 insertions(+), 45 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/5ed41c8d/faq.md -- diff --git a/faq.md b/faq.md index 281d7ca..614664c 100644 --- a/faq.md +++ b/faq.md @@ -71,4 +71,4 @@ Please also refer to our Where can I get more help? -Please post on StackOverflow's https://stackoverflow.com/questions/tagged/apache-spark;>apache-spark tag or https://apache-spark-user-list.1001560.n3.nabble.com;>Spark Users mailing list. For more information, please refer to https://spark.apache.org/community.html#have-questions;>Have Questions?. We'll be glad to help! +Please post on StackOverflow's https://stackoverflow.com/questions/tagged/apache-spark;>apache-spark tag or http://apache-spark-user-list.1001560.n3.nabble.com;>Spark Users mailing list. For more information, please refer to https://spark.apache.org/community.html#have-questions;>Have Questions?. We'll be glad to help! http://git-wip-us.apache.org/repos/asf/spark-website/blob/5ed41c8d/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md -- diff --git a/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md b/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md index 1a6ca6d..a0ff02a 100644 --- a/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md +++ b/news/_posts/2013-06-21-spark-accepted-into-apache-incubator.md @@ -11,4 +11,4 @@ meta: _edit_last: '4' _wpas_done_all: '1' --- -Spark was recently http://mail-archives.apache.org/mod_mbox/incubator-general/201306.mbox/%3CCDE7B773.E9A48%25chris.a.mattmann%40jpl.nasa.gov%3E;>accepted into the http://incubator.apache.org;>Apache Incubator, which will serve as the long-term home for the project. While moving the source code and issue tracking to Apache will take some time, we are excited to be joining the community at Apache. Stay tuned on this site for updates on how the project hosting will change. +Spark was recently http://mail-archives.apache.org/mod_mbox/incubator-general/201306.mbox/%3ccde7b773.e9a48%25chris.a.mattm...@jpl.nasa.gov%3E;>accepted into the http://incubator.apache.org;>Apache Incubator, which will serve as the long-term home for the project. While moving the source code and issue tracking to Apache will take some time, we are excited to be joining the community at Apache. Stay tuned on this site for updates on how the project hosting will change. http://git-wip-us.apache.org/repos/asf/spark-website/blob/5ed41c8d/news/_posts/2016-05-26-spark-2.0.0-preview.md -- diff --git a/news/_posts/2016-05-26-spark-2.0.0-preview.md b/news/_posts/2016-05-26-spark-2.0.0-preview.md index 053beb6..fb642f7 100644 --- a/news/_posts/2016-05-26-spark-2.0.0-preview.md +++ b/news/_posts/2016-05-26-spark-2.0.0-preview.md @@ -11,6 +11,6 @@ meta: _edit_last: '4' _wpas_done_all: '1' --- -To enable wide-scale community testing of the upcoming Spark 2.0 release, the Apache Spark team has posted a https://dist.apache.org/repos/dist/release/spark/spark-2.0.0-preview/;>preview release of Spark 2.0. This preview is not a stable release in terms of either API or functionality, but it is meant to give the community early access to try the code that will become Spark 2.0. If you would like to test the release, simply download it, and send feedback using either the
[1/2] spark-website git commit: Replace most http links with https as a best practice, where possible
Repository: spark-website Updated Branches: refs/heads/asf-site c2c0905b4 -> 62cf4a16d http://git-wip-us.apache.org/repos/asf/spark-website/blob/62cf4a16/site/sitemap.xml -- diff --git a/site/sitemap.xml b/site/sitemap.xml index bc93fb7..eb4e705 100644 --- a/site/sitemap.xml +++ b/site/sitemap.xml @@ -6,698 +6,698 @@ http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd;> - http://spark.apache.org/ + https://spark.apache.org/ daily 1.0 - http://spark.apache.org/docs/latest/index.html + https://spark.apache.org/docs/latest/index.html daily 1.0 - http://spark.apache.org/docs/latest/quick-start.html + https://spark.apache.org/docs/latest/quick-start.html daily 1.0 - http://spark.apache.org/docs/latest/programming-guide.html + https://spark.apache.org/docs/latest/programming-guide.html daily 1.0 - http://spark.apache.org/docs/latest/streaming-programming-guide.html + https://spark.apache.org/docs/latest/streaming-programming-guide.html daily 1.0 - http://spark.apache.org/docs/latest/sql-programming-guide.html + https://spark.apache.org/docs/latest/sql-programming-guide.html daily 1.0 - http://spark.apache.org/docs/latest/structured-streaming-programming-guide.html + https://spark.apache.org/docs/latest/structured-streaming-programming-guide.html daily 1.0 - http://spark.apache.org/docs/latest/ml-guide.html + https://spark.apache.org/docs/latest/ml-guide.html daily 1.0 - http://spark.apache.org/docs/latest/graphx-programming-guide.html + https://spark.apache.org/docs/latest/graphx-programming-guide.html daily 1.0 - http://spark.apache.org/docs/latest/sparkr.html + https://spark.apache.org/docs/latest/sparkr.html daily 1.0 - http://spark.apache.org/docs/latest/cluster-overview.html + https://spark.apache.org/docs/latest/cluster-overview.html daily 1.0 - http://spark.apache.org/docs/latest/submitting-applications.html + https://spark.apache.org/docs/latest/submitting-applications.html daily 1.0 - http://spark.apache.org/docs/latest/spark-standalone.html + https://spark.apache.org/docs/latest/spark-standalone.html daily 1.0 - http://spark.apache.org/docs/latest/running-on-mesos.html + https://spark.apache.org/docs/latest/running-on-mesos.html daily 1.0 - http://spark.apache.org/docs/latest/running-on-yarn.html + https://spark.apache.org/docs/latest/running-on-yarn.html daily 1.0 - http://spark.apache.org/docs/latest/configuration.html + https://spark.apache.org/docs/latest/configuration.html daily 1.0 - http://spark.apache.org/docs/latest/monitoring.html + https://spark.apache.org/docs/latest/monitoring.html daily 1.0 - http://spark.apache.org/docs/latest/tuning.html + https://spark.apache.org/docs/latest/tuning.html daily 1.0 - http://spark.apache.org/docs/latest/job-scheduling.html + https://spark.apache.org/docs/latest/job-scheduling.html daily 1.0 - http://spark.apache.org/docs/latest/security.html + https://spark.apache.org/docs/latest/security.html daily 1.0 - http://spark.apache.org/docs/latest/hardware-provisioning.html + https://spark.apache.org/docs/latest/hardware-provisioning.html daily 1.0 - http://spark.apache.org/docs/latest/building-spark.html + https://spark.apache.org/docs/latest/building-spark.html daily 1.0 - http://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package + https://spark.apache.org/docs/latest/api/scala/index.html#org.apache.spark.package daily 1.0 - http://spark.apache.org/docs/latest/api/java/index.html + https://spark.apache.org/docs/latest/api/java/index.html weekly 1.0 - http://spark.apache.org/docs/latest/api/python/index.html + https://spark.apache.org/docs/latest/api/python/index.html weekly 1.0 - http://spark.apache.org/docs/latest/api/R/index.html + https://spark.apache.org/docs/latest/api/R/index.html weekly 1.0 - http://spark.apache.org/releases/spark-release-2-1-1.html + https://spark.apache.org/releases/spark-release-2-1-1.html weekly - http://spark.apache.org/news/spark-2-1-1-released.html + https://spark.apache.org/news/spark-2-1-1-released.html weekly - http://spark.apache.org/news/spark-summit-june-2017-agenda-posted.html + https://spark.apache.org/news/spark-summit-june-2017-agenda-posted.html weekly - http://spark.apache.org/news/spark-summit-east-2017-agenda-posted.html + https://spark.apache.org/news/spark-summit-east-2017-agenda-posted.html weekly - http://spark.apache.org/releases/spark-release-2-1-0.html + https://spark.apache.org/releases/spark-release-2-1-0.html weekly - http://spark.apache.org/news/spark-2-1-0-released.html +
[2/2] spark-website git commit: Replace most http links with https as a best practice, where possible
Replace most http links with https as a best practice, where possible Project: http://git-wip-us.apache.org/repos/asf/spark-website/repo Commit: http://git-wip-us.apache.org/repos/asf/spark-website/commit/62cf4a16 Tree: http://git-wip-us.apache.org/repos/asf/spark-website/tree/62cf4a16 Diff: http://git-wip-us.apache.org/repos/asf/spark-website/diff/62cf4a16 Branch: refs/heads/asf-site Commit: 62cf4a16daae3cf1b68745b8f676dbb29c167af2 Parents: c2c0905 Author: Sean OwenAuthored: Wed May 10 10:56:35 2017 +0100 Committer: Sean Owen Committed: Wed May 10 19:02:39 2017 +0100 -- _config.yml| 2 +- community.md | 8 +- contributing.md| 10 +- developer-tools.md | 8 +- documentation.md | 40 ++--- downloads.md | 4 +- examples.md| 10 +- faq.md | 6 +- index.md | 12 +- mllib/index.md | 4 +- powered-by.md | 12 +- release-process.md | 6 +- robots.txt | 2 +- site/community.html| 8 +- site/contributing.html | 10 +- site/developer-tools.html | 8 +- site/documentation.html| 40 ++--- site/downloads.html| 4 +- site/examples.html | 10 +- site/faq.html | 6 +- site/index.html| 12 +- site/mailing-lists.html| 2 +- site/mllib/index.html | 4 +- site/powered-by.html | 15 +- site/release-process.html | 6 +- site/robots.txt| 2 +- site/sitemap.xml | 332 ++-- site/streaming/index.html | 8 +- site/third-party-projects.html | 8 +- site/trademarks.html | 2 +- sitemap.xml| 52 +++--- streaming/index.md | 8 +- third-party-projects.md| 8 +- trademarks.md | 2 +- 34 files changed, 332 insertions(+), 339 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark-website/blob/62cf4a16/_config.yml -- diff --git a/_config.yml b/_config.yml index 18ba30f..9a3934e 100644 --- a/_config.yml +++ b/_config.yml @@ -6,4 +6,4 @@ permalink: none destination: site exclude: ['README.md','content'] keep_files: ['docs'] -url: http://spark.apache.org \ No newline at end of file +url: https://spark.apache.org \ No newline at end of file http://git-wip-us.apache.org/repos/asf/spark-website/blob/62cf4a16/community.md -- diff --git a/community.md b/community.md index 9fcb2b5..9fc6136 100644 --- a/community.md +++ b/community.md @@ -15,18 +15,18 @@ navigation: StackOverflow For usage questions and help (e.g. how to use this Spark API), it is recommended you use the -StackOverflow tag http://stackoverflow.com/questions/tagged/apache-spark;>`apache-spark` +StackOverflow tag https://stackoverflow.com/questions/tagged/apache-spark;>`apache-spark` as it is an active forum for Spark users' questions and answers. Some quick tips when using StackOverflow: - Prior to asking submitting questions, please: - Search StackOverflow's - http://stackoverflow.com/questions/tagged/apache-spark;>`apache-spark` tag to see if + https://stackoverflow.com/questions/tagged/apache-spark;>`apache-spark` tag to see if your question has already been answered - Search the nabble archive for http://apache-spark-user-list.1001560.n3.nabble.com/;>us...@spark.apache.org -- Please follow the StackOverflow http://stackoverflow.com/help/how-to-ask;>code of conduct +- Please follow the StackOverflow https://stackoverflow.com/help/how-to-ask;>code of conduct - Always use the `apache-spark` tag when asking questions - Please also use a secondary tag to specify components so subject matter experts can more easily find them. Examples include: `pyspark`, `spark-dataframe`, `spark-streaming`, `spark-r`, `spark-mllib`, @@ -58,7 +58,7 @@ project, and scenarios, it is recommended you use the u...@spark.apache.org mail Some quick tips when using email: - Prior to asking submitting questions, please: - - Search StackOverflow at http://stackoverflow.com/questions/tagged/apache-spark;>`apache-spark` + - Search StackOverflow at https://stackoverflow.com/questions/tagged/apache-spark;>`apache-spark` to see if your question has already been answered - Search the nabble archive for http://apache-spark-user-list.1001560.n3.nabble.com/;>us...@spark.apache.org http://git-wip-us.apache.org/repos/asf/spark-website/blob/62cf4a16/contributing.md
spark git commit: [SPARK-20569][SQL] RuntimeReplaceable functions should not take extra parameters
Repository: spark Updated Branches: refs/heads/branch-2.2 80a57fa90 -> dd9e3b2c9 [SPARK-20569][SQL] RuntimeReplaceable functions should not take extra parameters ## What changes were proposed in this pull request? `RuntimeReplaceable` always has a constructor with the expression to replace with, and this constructor should not be the function builder. ## How was this patch tested? new regression test Author: Wenchen FanCloses #17876 from cloud-fan/minor. (cherry picked from commit b4c99f43690f8cfba414af90fa2b3998a510bba8) Signed-off-by: Xiao Li Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dd9e3b2c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dd9e3b2c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dd9e3b2c Branch: refs/heads/branch-2.2 Commit: dd9e3b2c976a4ef3b4837590a2ba0954cf73860d Parents: 80a57fa Author: Wenchen Fan Authored: Thu May 11 00:41:15 2017 -0700 Committer: Xiao Li Committed: Thu May 11 00:41:35 2017 -0700 -- .../catalyst/analysis/FunctionRegistry.scala| 20 ++-- .../org/apache/spark/sql/SQLQuerySuite.scala| 5 + 2 files changed, 19 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dd9e3b2c/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index e1d83a8..6fc154f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.analysis +import java.lang.reflect.Modifier + import scala.language.existentials import scala.reflect.ClassTag import scala.util.{Failure, Success, Try} @@ -455,8 +457,17 @@ object FunctionRegistry { private def expression[T <: Expression](name: String) (implicit tag: ClassTag[T]): (String, (ExpressionInfo, FunctionBuilder)) = { +// For `RuntimeReplaceable`, skip the constructor with most arguments, which is the main +// constructor and contains non-parameter `child` and should not be used as function builder. +val constructors = if (classOf[RuntimeReplaceable].isAssignableFrom(tag.runtimeClass)) { + val all = tag.runtimeClass.getConstructors + val maxNumArgs = all.map(_.getParameterCount).max + all.filterNot(_.getParameterCount == maxNumArgs) +} else { + tag.runtimeClass.getConstructors +} // See if we can find a constructor that accepts Seq[Expression] -val varargCtor = Try(tag.runtimeClass.getDeclaredConstructor(classOf[Seq[_]])).toOption +val varargCtor = constructors.find(_.getParameterTypes.toSeq == Seq(classOf[Seq[_]])) val builder = (expressions: Seq[Expression]) => { if (varargCtor.isDefined) { // If there is an apply method that accepts Seq[Expression], use that one. @@ -470,11 +481,8 @@ object FunctionRegistry { } else { // Otherwise, find a constructor method that matches the number of arguments, and use that. val params = Seq.fill(expressions.size)(classOf[Expression]) -val f = Try(tag.runtimeClass.getDeclaredConstructor(params : _*)) match { - case Success(e) => -e - case Failure(e) => -throw new AnalysisException(s"Invalid number of arguments for function $name") +val f = constructors.find(_.getParameterTypes.toSeq == params).getOrElse { + throw new AnalysisException(s"Invalid number of arguments for function $name") } Try(f.newInstance(expressions : _*).asInstanceOf[Expression]) match { case Success(e) => e http://git-wip-us.apache.org/repos/asf/spark/blob/dd9e3b2c/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 3ecbf96..cd14d24 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2619,4 +2619,9 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { new URL(jarFromInvalidFs) } } + + test("RuntimeReplaceable functions should not take extra parameters") { +val e =
spark git commit: [SPARK-20569][SQL] RuntimeReplaceable functions should not take extra parameters
Repository: spark Updated Branches: refs/heads/master 65accb813 -> b4c99f436 [SPARK-20569][SQL] RuntimeReplaceable functions should not take extra parameters ## What changes were proposed in this pull request? `RuntimeReplaceable` always has a constructor with the expression to replace with, and this constructor should not be the function builder. ## How was this patch tested? new regression test Author: Wenchen FanCloses #17876 from cloud-fan/minor. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b4c99f43 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b4c99f43 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b4c99f43 Branch: refs/heads/master Commit: b4c99f43690f8cfba414af90fa2b3998a510bba8 Parents: 65accb8 Author: Wenchen Fan Authored: Thu May 11 00:41:15 2017 -0700 Committer: Xiao Li Committed: Thu May 11 00:41:15 2017 -0700 -- .../catalyst/analysis/FunctionRegistry.scala| 20 ++-- .../org/apache/spark/sql/SQLQuerySuite.scala| 5 + 2 files changed, 19 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b4c99f43/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index e1d83a8..6fc154f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -17,6 +17,8 @@ package org.apache.spark.sql.catalyst.analysis +import java.lang.reflect.Modifier + import scala.language.existentials import scala.reflect.ClassTag import scala.util.{Failure, Success, Try} @@ -455,8 +457,17 @@ object FunctionRegistry { private def expression[T <: Expression](name: String) (implicit tag: ClassTag[T]): (String, (ExpressionInfo, FunctionBuilder)) = { +// For `RuntimeReplaceable`, skip the constructor with most arguments, which is the main +// constructor and contains non-parameter `child` and should not be used as function builder. +val constructors = if (classOf[RuntimeReplaceable].isAssignableFrom(tag.runtimeClass)) { + val all = tag.runtimeClass.getConstructors + val maxNumArgs = all.map(_.getParameterCount).max + all.filterNot(_.getParameterCount == maxNumArgs) +} else { + tag.runtimeClass.getConstructors +} // See if we can find a constructor that accepts Seq[Expression] -val varargCtor = Try(tag.runtimeClass.getDeclaredConstructor(classOf[Seq[_]])).toOption +val varargCtor = constructors.find(_.getParameterTypes.toSeq == Seq(classOf[Seq[_]])) val builder = (expressions: Seq[Expression]) => { if (varargCtor.isDefined) { // If there is an apply method that accepts Seq[Expression], use that one. @@ -470,11 +481,8 @@ object FunctionRegistry { } else { // Otherwise, find a constructor method that matches the number of arguments, and use that. val params = Seq.fill(expressions.size)(classOf[Expression]) -val f = Try(tag.runtimeClass.getDeclaredConstructor(params : _*)) match { - case Success(e) => -e - case Failure(e) => -throw new AnalysisException(s"Invalid number of arguments for function $name") +val f = constructors.find(_.getParameterTypes.toSeq == params).getOrElse { + throw new AnalysisException(s"Invalid number of arguments for function $name") } Try(f.newInstance(expressions : _*).asInstanceOf[Expression]) match { case Success(e) => e http://git-wip-us.apache.org/repos/asf/spark/blob/b4c99f43/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 3ecbf96..cd14d24 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2619,4 +2619,9 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { new URL(jarFromInvalidFs) } } + + test("RuntimeReplaceable functions should not take extra parameters") { +val e = intercept[AnalysisException](sql("SELECT nvl(1, 2, 3)")) +assert(e.message.contains("Invalid number of arguments")) + } }
spark git commit: [SPARK-17029] make toJSON not go through rdd form but operate on dataset always
Repository: spark Updated Branches: refs/heads/master 0698e6c88 -> 65accb813 [SPARK-17029] make toJSON not go through rdd form but operate on dataset always ## What changes were proposed in this pull request? Don't convert toRdd when doing toJSON ## How was this patch tested? Existing unit tests Author: Robert KruszewskiCloses #14615 from robert3005/robertk/correct-tojson. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/65accb81 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/65accb81 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/65accb81 Branch: refs/heads/master Commit: 65accb813add9f58c1e9f1555863fe0bb1932ad8 Parents: 0698e6c Author: Robert Kruszewski Authored: Thu May 11 15:26:48 2017 +0800 Committer: Wenchen Fan Committed: Thu May 11 15:26:48 2017 +0800 -- .../src/main/scala/org/apache/spark/sql/Dataset.scala | 8 +++- .../spark/sql/execution/datasources/json/JsonSuite.scala | 10 ++ 2 files changed, 13 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/65accb81/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 61154e2..c75921e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2806,7 +2806,7 @@ class Dataset[T] private[sql]( def toJSON: Dataset[String] = { val rowSchema = this.schema val sessionLocalTimeZone = sparkSession.sessionState.conf.sessionLocalTimeZone -val rdd: RDD[String] = queryExecution.toRdd.mapPartitions { iter => +mapPartitions { iter => val writer = new CharArrayWriter() // create the Generator without separator inserted between 2 records val gen = new JacksonGenerator(rowSchema, writer, @@ -2815,7 +2815,7 @@ class Dataset[T] private[sql]( new Iterator[String] { override def hasNext: Boolean = iter.hasNext override def next(): String = { - gen.write(iter.next()) + gen.write(exprEnc.toRow(iter.next())) gen.flush() val json = writer.toString @@ -2828,9 +2828,7 @@ class Dataset[T] private[sql]( json } } -} -import sparkSession.implicits.newStringEncoder -sparkSession.createDataset(rdd) +} (Encoders.STRING) } /** http://git-wip-us.apache.org/repos/asf/spark/blob/65accb81/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala index 2ab0381..5e7f794 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala @@ -31,6 +31,7 @@ import org.apache.spark.SparkException import org.apache.spark.sql.{functions => F, _} import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JSONOptions} import org.apache.spark.sql.catalyst.util.DateTimeUtils +import org.apache.spark.sql.execution.ExternalRDD import org.apache.spark.sql.execution.datasources.DataSource import org.apache.spark.sql.execution.datasources.json.JsonInferSchema.compatibleType import org.apache.spark.sql.internal.SQLConf @@ -1326,6 +1327,15 @@ class JsonSuite extends QueryTest with SharedSQLContext with TestJsonData { ) } + test("Dataset toJSON doesn't construct rdd") { +val containsRDD = spark.emptyDataFrame.toJSON.queryExecution.logical.find { + case ExternalRDD(_, _) => true + case _ => false +} + +assert(containsRDD.isEmpty, "Expected logical plan of toJSON to not contain an RDD") + } + test("JSONRelation equality test") { withTempPath(dir => { val path = dir.getCanonicalFile.toURI.toString - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-20606][ML] Revert "[] ML 2.2 QA: Remove deprecated methods for ML"
Repository: spark Updated Branches: refs/heads/branch-2.2 3eb0ee06a -> 80a57fa90 [SPARK-20606][ML] Revert "[] ML 2.2 QA: Remove deprecated methods for ML" This reverts commit b8733e0ad9f5a700f385e210450fd2c10137293e. Author: Yanbo LiangCloses #17944 from yanboliang/spark-20606-revert. (cherry picked from commit 0698e6c88ca11fdfd6e5498cab784cf6dbcdfacb) Signed-off-by: Yanbo Liang Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/80a57fa9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/80a57fa9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/80a57fa9 Branch: refs/heads/branch-2.2 Commit: 80a57fa90be8dca4340345c09b4ea28fbf11a516 Parents: 3eb0ee0 Author: Yanbo Liang Authored: Thu May 11 14:48:13 2017 +0800 Committer: Yanbo Liang Committed: Thu May 11 14:48:26 2017 +0800 -- .../classification/DecisionTreeClassifier.scala | 18 ++-- .../spark/ml/classification/GBTClassifier.scala | 24 ++--- .../classification/RandomForestClassifier.scala | 24 ++--- .../ml/regression/DecisionTreeRegressor.scala | 18 ++-- .../spark/ml/regression/GBTRegressor.scala | 24 ++--- .../ml/regression/RandomForestRegressor.scala | 24 ++--- .../org/apache/spark/ml/tree/treeParams.scala | 105 +++ .../org/apache/spark/ml/util/ReadWrite.scala| 16 +++ project/MimaExcludes.scala | 68 python/pyspark/ml/util.py | 32 ++ 10 files changed, 219 insertions(+), 134 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/80a57fa9/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala index 5fb105c..9f60f08 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala @@ -54,27 +54,27 @@ class DecisionTreeClassifier @Since("1.4.0") ( /** @group setParam */ @Since("1.4.0") - def setMaxDepth(value: Int): this.type = set(maxDepth, value) + override def setMaxDepth(value: Int): this.type = set(maxDepth, value) /** @group setParam */ @Since("1.4.0") - def setMaxBins(value: Int): this.type = set(maxBins, value) + override def setMaxBins(value: Int): this.type = set(maxBins, value) /** @group setParam */ @Since("1.4.0") - def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) + override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) /** @group setParam */ @Since("1.4.0") - def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) + override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) /** @group expertSetParam */ @Since("1.4.0") - def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) + override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) /** @group expertSetParam */ @Since("1.4.0") - def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) + override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) /** * Specifies how often to checkpoint the cached node IDs. @@ -86,15 +86,15 @@ class DecisionTreeClassifier @Since("1.4.0") ( * @group setParam */ @Since("1.4.0") - def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) + override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) /** @group setParam */ @Since("1.4.0") - def setImpurity(value: String): this.type = set(impurity, value) + override def setImpurity(value: String): this.type = set(impurity, value) /** @group setParam */ @Since("1.6.0") - def setSeed(value: Long): this.type = set(seed, value) + override def setSeed(value: Long): this.type = set(seed, value) override protected def train(dataset: Dataset[_]): DecisionTreeClassificationModel = { val categoricalFeatures: Map[Int, Int] = http://git-wip-us.apache.org/repos/asf/spark/blob/80a57fa9/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index 263ed10..ade0960 100644 ---
spark git commit: [SPARK-20606][ML] Revert "[] ML 2.2 QA: Remove deprecated methods for ML"
Repository: spark Updated Branches: refs/heads/master 8ddbc431d -> 0698e6c88 [SPARK-20606][ML] Revert "[] ML 2.2 QA: Remove deprecated methods for ML" This reverts commit b8733e0ad9f5a700f385e210450fd2c10137293e. Author: Yanbo LiangCloses #17944 from yanboliang/spark-20606-revert. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0698e6c8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0698e6c8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0698e6c8 Branch: refs/heads/master Commit: 0698e6c88ca11fdfd6e5498cab784cf6dbcdfacb Parents: 8ddbc43 Author: Yanbo Liang Authored: Thu May 11 14:48:13 2017 +0800 Committer: Yanbo Liang Committed: Thu May 11 14:48:13 2017 +0800 -- .../classification/DecisionTreeClassifier.scala | 18 ++-- .../spark/ml/classification/GBTClassifier.scala | 24 ++--- .../classification/RandomForestClassifier.scala | 24 ++--- .../ml/regression/DecisionTreeRegressor.scala | 18 ++-- .../spark/ml/regression/GBTRegressor.scala | 24 ++--- .../ml/regression/RandomForestRegressor.scala | 24 ++--- .../org/apache/spark/ml/tree/treeParams.scala | 105 +++ .../org/apache/spark/ml/util/ReadWrite.scala| 16 +++ project/MimaExcludes.scala | 68 python/pyspark/ml/util.py | 32 ++ 10 files changed, 219 insertions(+), 134 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0698e6c8/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala index 5fb105c..9f60f08 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala @@ -54,27 +54,27 @@ class DecisionTreeClassifier @Since("1.4.0") ( /** @group setParam */ @Since("1.4.0") - def setMaxDepth(value: Int): this.type = set(maxDepth, value) + override def setMaxDepth(value: Int): this.type = set(maxDepth, value) /** @group setParam */ @Since("1.4.0") - def setMaxBins(value: Int): this.type = set(maxBins, value) + override def setMaxBins(value: Int): this.type = set(maxBins, value) /** @group setParam */ @Since("1.4.0") - def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) + override def setMinInstancesPerNode(value: Int): this.type = set(minInstancesPerNode, value) /** @group setParam */ @Since("1.4.0") - def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) + override def setMinInfoGain(value: Double): this.type = set(minInfoGain, value) /** @group expertSetParam */ @Since("1.4.0") - def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) + override def setMaxMemoryInMB(value: Int): this.type = set(maxMemoryInMB, value) /** @group expertSetParam */ @Since("1.4.0") - def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) + override def setCacheNodeIds(value: Boolean): this.type = set(cacheNodeIds, value) /** * Specifies how often to checkpoint the cached node IDs. @@ -86,15 +86,15 @@ class DecisionTreeClassifier @Since("1.4.0") ( * @group setParam */ @Since("1.4.0") - def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) + override def setCheckpointInterval(value: Int): this.type = set(checkpointInterval, value) /** @group setParam */ @Since("1.4.0") - def setImpurity(value: String): this.type = set(impurity, value) + override def setImpurity(value: String): this.type = set(impurity, value) /** @group setParam */ @Since("1.6.0") - def setSeed(value: Long): this.type = set(seed, value) + override def setSeed(value: Long): this.type = set(seed, value) override protected def train(dataset: Dataset[_]): DecisionTreeClassificationModel = { val categoricalFeatures: Map[Int, Int] = http://git-wip-us.apache.org/repos/asf/spark/blob/0698e6c8/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index 263ed10..ade0960 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++