Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/21073#discussion_r192918429
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
---
@@ -116,6 +117,161 @@ case class MapValues(child: Expression)
override def prettyName: String = "map_values"
}
+/**
+ * Returns the union of all the given maps.
+ */
+@ExpressionDescription(
+usage = "_FUNC_(map, ...) - Returns the union of all the given maps",
+examples = """
+ Examples:
+ > SELECT _FUNC_(map(1, 'a', 2, 'b'), map(2, 'c', 3, 'd'));
+ [[1 -> "a"], [2 -> "c"], [3 -> "d"]
+ """, since = "2.4.0")
+case class MapConcat(children: Seq[Expression]) extends Expression {
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ // check key types and value types separately to allow
valueContainsNull to vary
+ if (children.exists(!_.dataType.isInstanceOf[MapType])) {
+ TypeCheckResult.TypeCheckFailure(
+ s"The given input of function $prettyName should all be of type
map, " +
+ "but they are " +
children.map(_.dataType.simpleString).mkString("[", ", ", "]"))
+ } else if
(children.map(_.dataType.asInstanceOf[MapType].keyType).distinct.length > 1) {
+ TypeCheckResult.TypeCheckFailure(
+ s"The given input maps of function $prettyName should all be the
same type, " +
+ "but they are " +
children.map(_.dataType.simpleString).mkString("[", ", ", "]"))
+ } else if
(children.map(_.dataType.asInstanceOf[MapType].valueType).distinct.length > 1) {
+ TypeCheckResult.TypeCheckFailure(
+ s"The given input maps of function $prettyName should all be the
same type, " +
+ "but they are " +
children.map(_.dataType.simpleString).mkString("[", ", ", "]"))
+ } else {
+ TypeCheckResult.TypeCheckSuccess
+ }
+ }
+
+ override def dataType: MapType = {
+ MapType(
+ keyType = children.headOption
+
.map(_.dataType.asInstanceOf[MapType].keyType).getOrElse(StringType),
+ valueType = children.headOption
+
.map(_.dataType.asInstanceOf[MapType].valueType).getOrElse(StringType),
+ valueContainsNull = children.map { c =>
+ c.dataType.asInstanceOf[MapType]
+ }.exists(_.valueContainsNull)
+ )
+ }
+
+ override def nullable: Boolean = children.exists(_.nullable)
+
+ override def eval(input: InternalRow): Any = {
+ val union = new util.LinkedHashMap[Any, Any]()
+ children.map(_.eval(input)).foreach { raw =>
+ if (raw == null) {
+ return null
+ }
+ val map = raw.asInstanceOf[MapData]
+ map.foreach(dataType.keyType, dataType.valueType, (k, v) =>
+ union.put(k, v)
+ )
--- End diff --
@bersprockets Hi, thanks for the investigation. We don't need to care about
key duplication like `CreateMap` for now.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]