spark git commit: [SPARK-16278][SPARK-16279][SQL] Implement map_keys/map_values SQL functions
Repository: spark Updated Branches: refs/heads/branch-2.0 88603bd4f -> 7ef1d1c61 [SPARK-16278][SPARK-16279][SQL] Implement map_keys/map_values SQL functions This PR adds `map_keys` and `map_values` SQL functions in order to remove Hive fallback. Pass the Jenkins tests including new testcases. Author: Dongjoon HyunCloses #13967 from dongjoon-hyun/SPARK-16278. (cherry picked from commit 54b27c1797fcd32b3f3e9d44e1a149ae396a61e6) Signed-off-by: Reynold Xin Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7ef1d1c6 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7ef1d1c6 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7ef1d1c6 Branch: refs/heads/branch-2.0 Commit: 7ef1d1c618100313dbbdb6f615d9f87ff67e895d Parents: 88603bd Author: Dongjoon Hyun Authored: Sun Jul 3 16:59:40 2016 +0800 Committer: Reynold Xin Committed: Thu Jul 7 21:02:50 2016 -0700 -- .../catalyst/analysis/FunctionRegistry.scala| 2 + .../expressions/collectionOperations.scala | 48 .../expressions/CollectionFunctionsSuite.scala | 13 ++ .../spark/sql/DataFrameFunctionsSuite.scala | 16 +++ .../spark/sql/hive/HiveSessionCatalog.scala | 1 - 5 files changed, 79 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7ef1d1c6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 95be0d6..27c3a09 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -170,6 +170,8 @@ object FunctionRegistry { expression[IsNotNull]("isnotnull"), expression[Least]("least"), expression[CreateMap]("map"), +expression[MapKeys]("map_keys"), +expression[MapValues]("map_values"), expression[CreateNamedStruct]("named_struct"), expression[NaNvl]("nanvl"), expression[NullIf]("nullif"), http://git-wip-us.apache.org/repos/asf/spark/blob/7ef1d1c6/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index c71cb73..2e8ea11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -44,6 +44,54 @@ case class Size(child: Expression) extends UnaryExpression with ExpectsInputType } /** + * Returns an unordered array containing the keys of the map. + */ +@ExpressionDescription( + usage = "_FUNC_(map) - Returns an unordered array containing the keys of the map.", + extended = " > SELECT _FUNC_(map(1, 'a', 2, 'b'));\n [1,2]") +case class MapKeys(child: Expression) + extends UnaryExpression with ExpectsInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(MapType) + + override def dataType: DataType = ArrayType(child.dataType.asInstanceOf[MapType].keyType) + + override def nullSafeEval(map: Any): Any = { +map.asInstanceOf[MapData].keyArray() + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +nullSafeCodeGen(ctx, ev, c => s"${ev.value} = ($c).keyArray();") + } + + override def prettyName: String = "map_keys" +} + +/** + * Returns an unordered array containing the values of the map. + */ +@ExpressionDescription( + usage = "_FUNC_(map) - Returns an unordered array containing the values of the map.", + extended = " > SELECT _FUNC_(map(1, 'a', 2, 'b'));\n [\"a\",\"b\"]") +case class MapValues(child: Expression) + extends UnaryExpression with ExpectsInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(MapType) + + override def dataType: DataType = ArrayType(child.dataType.asInstanceOf[MapType].valueType) + + override def nullSafeEval(map: Any): Any = { +map.asInstanceOf[MapData].valueArray() + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +nullSafeCodeGen(ctx, ev, c => s"${ev.value} = ($c).valueArray();") + } + + override def prettyName: String = "map_values" +} + +/** * Sorts the input array in
spark git commit: [SPARK-16278][SPARK-16279][SQL] Implement map_keys/map_values SQL functions
Repository: spark Updated Branches: refs/heads/master ea990f969 -> 54b27c179 [SPARK-16278][SPARK-16279][SQL] Implement map_keys/map_values SQL functions ## What changes were proposed in this pull request? This PR adds `map_keys` and `map_values` SQL functions in order to remove Hive fallback. ## How was this patch tested? Pass the Jenkins tests including new testcases. Author: Dongjoon HyunCloses #13967 from dongjoon-hyun/SPARK-16278. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/54b27c17 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/54b27c17 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/54b27c17 Branch: refs/heads/master Commit: 54b27c1797fcd32b3f3e9d44e1a149ae396a61e6 Parents: ea990f9 Author: Dongjoon Hyun Authored: Sun Jul 3 16:59:40 2016 +0800 Committer: Wenchen Fan Committed: Sun Jul 3 16:59:40 2016 +0800 -- .../catalyst/analysis/FunctionRegistry.scala| 2 + .../expressions/collectionOperations.scala | 48 .../expressions/CollectionFunctionsSuite.scala | 13 ++ .../spark/sql/DataFrameFunctionsSuite.scala | 16 +++ .../spark/sql/hive/HiveSessionCatalog.scala | 1 - 5 files changed, 79 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/54b27c17/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 26b0c30..e7f335f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -171,6 +171,8 @@ object FunctionRegistry { expression[IsNotNull]("isnotnull"), expression[Least]("least"), expression[CreateMap]("map"), +expression[MapKeys]("map_keys"), +expression[MapValues]("map_values"), expression[CreateNamedStruct]("named_struct"), expression[NaNvl]("nanvl"), expression[NullIf]("nullif"), http://git-wip-us.apache.org/repos/asf/spark/blob/54b27c17/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index c71cb73..2e8ea11 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -44,6 +44,54 @@ case class Size(child: Expression) extends UnaryExpression with ExpectsInputType } /** + * Returns an unordered array containing the keys of the map. + */ +@ExpressionDescription( + usage = "_FUNC_(map) - Returns an unordered array containing the keys of the map.", + extended = " > SELECT _FUNC_(map(1, 'a', 2, 'b'));\n [1,2]") +case class MapKeys(child: Expression) + extends UnaryExpression with ExpectsInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(MapType) + + override def dataType: DataType = ArrayType(child.dataType.asInstanceOf[MapType].keyType) + + override def nullSafeEval(map: Any): Any = { +map.asInstanceOf[MapData].keyArray() + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +nullSafeCodeGen(ctx, ev, c => s"${ev.value} = ($c).keyArray();") + } + + override def prettyName: String = "map_keys" +} + +/** + * Returns an unordered array containing the values of the map. + */ +@ExpressionDescription( + usage = "_FUNC_(map) - Returns an unordered array containing the values of the map.", + extended = " > SELECT _FUNC_(map(1, 'a', 2, 'b'));\n [\"a\",\"b\"]") +case class MapValues(child: Expression) + extends UnaryExpression with ExpectsInputTypes { + + override def inputTypes: Seq[AbstractDataType] = Seq(MapType) + + override def dataType: DataType = ArrayType(child.dataType.asInstanceOf[MapType].valueType) + + override def nullSafeEval(map: Any): Any = { +map.asInstanceOf[MapData].valueArray() + } + + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { +nullSafeCodeGen(ctx, ev, c => s"${ev.value} = ($c).valueArray();") + } + + override def prettyName: String = "map_values" +} + +/** * Sorts the input array in ascending / descending order according to