Github user mn-mikke commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21236#discussion_r187813418
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
    @@ -118,6 +119,161 @@ case class MapValues(child: Expression)
       override def prettyName: String = "map_values"
     }
     
    +/**
    + * Returns an unordered array of all entries in the given map.
    + */
    +@ExpressionDescription(
    +  usage = "_FUNC_(map) - Returns an unordered array of all entries in the 
given map.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(1, 'a', 2, 'b'));
    +       [(1,"a"),(2,"b")]
    +  """,
    +  since = "2.4.0")
    +case class MapEntries(child: Expression) extends UnaryExpression with 
ExpectsInputTypes {
    +
    +  override def inputTypes: Seq[AbstractDataType] = Seq(MapType)
    +
    +  lazy val childDataType: MapType = child.dataType.asInstanceOf[MapType]
    +
    +  override def dataType: DataType = {
    +    ArrayType(
    +      StructType(
    +        StructField("key", childDataType.keyType, false) ::
    +        StructField("value", childDataType.valueType, 
childDataType.valueContainsNull) ::
    +        Nil),
    +      false)
    +  }
    +
    +  override protected def nullSafeEval(input: Any): Any = {
    +    val childMap = input.asInstanceOf[MapData]
    +    val keys = childMap.keyArray()
    +    val values = childMap.valueArray()
    +    val length = childMap.numElements()
    +    val resultData = new Array[AnyRef](length)
    +    var i = 0;
    +    while (i < length) {
    +      val key = keys.get(i, childDataType.keyType)
    +      val value = values.get(i, childDataType.valueType)
    +      val row = new GenericInternalRow(Array[Any](key, value))
    +      resultData.update(i, row)
    +      i += 1
    +    }
    +    new GenericArrayData(resultData)
    +  }
    +
    +  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
    +    nullSafeCodeGen(ctx, ev, c => {
    +      val numElements = ctx.freshName("numElements")
    +      val keys = ctx.freshName("keys")
    +      val values = ctx.freshName("values")
    +      val isKeyPrimitive = 
CodeGenerator.isPrimitiveType(childDataType.keyType)
    +      val isValuePrimitive = 
CodeGenerator.isPrimitiveType(childDataType.valueType)
    +      val code = if (isKeyPrimitive && isValuePrimitive) {
    +        genCodeForPrimitiveElements(ctx, keys, values, ev.value, 
numElements)
    +      } else {
    +        genCodeForAnyElements(ctx, keys, values, ev.value, numElements)
    +      }
    +      s"""
    +         |final int $numElements = $c.numElements();
    +         |final ArrayData $keys = $c.keyArray();
    +         |final ArrayData $values = $c.valueArray();
    +         |$code
    +       """.stripMargin
    +    })
    +  }
    +
    +  private def getKey(varName: String) = CodeGenerator.getValue(varName, 
childDataType.keyType, "z")
    +
    +  private def getValue(varName: String) = {
    +    CodeGenerator.getValue(varName, childDataType.valueType, "z")
    +  }
    +
    +  private def genCodeForPrimitiveElements(
    +      ctx: CodegenContext,
    +      keys: String,
    +      values: String,
    +      arrayData: String,
    +      numElements: String): String = {
    +    val byteArraySize = ctx.freshName("byteArraySize")
    +    val data = ctx.freshName("byteArray")
    +    val unsafeRow = ctx.freshName("unsafeRow")
    +    val unsafeArrayData = ctx.freshName("unsafeArrayData")
    +    val structsOffset = ctx.freshName("structsOffset")
    +    val calculateArraySize = 
"UnsafeArrayData.calculateSizeOfUnderlyingByteArray"
    +    val calculateHeader = "UnsafeArrayData.calculateHeaderPortionInBytes"
    +
    +    val baseOffset = Platform.BYTE_ARRAY_OFFSET
    +    val longSize = LongType.defaultSize
    +    val structSize = UnsafeRow.calculateBitSetWidthInBytes(2) + longSize * 
2
    --- End diff --
    
    @kiszk Thanks for your suggestion, but it seems to me that 
`LongType.defaultSize` could be used in this case. It seems that the purpose of 
`defaultSize` is not only the calculation of estimated data size in statistics. 
`GenerateUnsafeProjection.writeArrayToBuffer`, 
`InterpretedUnsafeProjection.getElementSize` and other parts utilize 
`defaultSize` in the same way.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to