[GitHub] spark pull request #21282: [SPARK-23934][SQL] Adding map_from_entries functi...

kiszk Wed, 09 May 2018 20:58:35 -0700

Github user kiszk commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21282#discussion_r187234431
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
    @@ -118,6 +120,229 @@ case class MapValues(child: Expression)
       override def prettyName: String = "map_values"
     }
     
    +/**
    + * Returns a map created from the given array of entries.
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(arrayOfEntries) - Returns a map created from the given 
array of entries.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
    +       {1:"a",2:"b"}
    +  """,
    +  since = "2.4.0")
    +case class MapFromEntries(child: Expression) extends UnaryExpression
    +{
    +  private lazy val resolvedDataType: Option[MapType] = child.dataType 
match {
    +    case ArrayType(
    +      StructType(Array(
    +        StructField(_, keyType, false, _),
    +        StructField(_, valueType, valueNullable, _))),
    +      false) => Some(MapType(keyType, valueType, valueNullable))
    +    case _ => None
    +  }
    +
    +  override def dataType: MapType = resolvedDataType.get
    +
    +  override def checkInputDataTypes(): TypeCheckResult = resolvedDataType 
match {
    +    case Some(_) => TypeCheckResult.TypeCheckSuccess
    +    case None => TypeCheckResult.TypeCheckFailure(s"'${child.sql}' is of " 
+
    +      s"${child.dataType.simpleString} type. $prettyName accepts only 
null-free arrays " +
    +      "of pair structs. Values of the first struct field can't contain 
nulls and produce " +
    +      "duplicates.")
    +  }
    +
    +  override protected def nullSafeEval(input: Any): Any = {
    +    val arrayData = input.asInstanceOf[ArrayData]
    +    val length = arrayData.numElements()
    +    val keyArray = new Array[AnyRef](length)
    +    val keySet = new OpenHashSet[AnyRef]()
    +    val valueArray = new Array[AnyRef](length)
    +    var i = 0;
    +    while (i < length) {
    +      val entry = arrayData.getStruct(i, 2)
    +      val key = entry.get(0, dataType.keyType)
    +      if (key == null) {
    +        throw new RuntimeException("The first field from a struct (key) 
can't be null.")
    +      }
    +      if (keySet.contains(key)) {
    --- End diff --
    
    Is this check necessary for now? This is because other operations (e.g. 
`CreateMap`) allows us to create a map with duplicated key. Is it better to be 
consistent in Spark?



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #21282: [SPARK-23934][SQL] Adding map_from_entries functi...

Reply via email to