Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/23124#discussion_r235866111 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -646,34 +633,35 @@ case class MapConcat(children: Seq[Expression]) extends ComplexTypeMergingExpres val mapMerge = s""" - |${ev.isNull} = $hasNullName; - |if (!${ev.isNull}) { - | $arrayDataClass[] $keyArgsName = new $arrayDataClass[${mapCodes.size}]; - | $arrayDataClass[] $valArgsName = new $arrayDataClass[${mapCodes.size}]; - | long $numElementsName = 0; - | for (int $idxName = 0; $idxName < $argsName.length; $idxName++) { - | $keyArgsName[$idxName] = $argsName[$idxName].keyArray(); - | $valArgsName[$idxName] = $argsName[$idxName].valueArray(); - | $numElementsName += $argsName[$idxName].numElements(); - | } - | if ($numElementsName > ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}) { - | throw new RuntimeException("Unsuccessful attempt to concat maps with " + - | $numElementsName + " elements due to exceeding the map size limit " + - | "${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}."); - | } - | $arrayDataClass $finKeysName = $keyConcat($keyArgsName, - | (int) $numElementsName); - | $arrayDataClass $finValsName = $valueConcat($valArgsName, - | (int) $numElementsName); - | ${ev.value} = new $arrayBasedMapDataClass($finKeysName, $finValsName); + |ArrayData[] $keyArgsName = new ArrayData[${mapCodes.size}]; + |ArrayData[] $valArgsName = new ArrayData[${mapCodes.size}]; + |long $numElementsName = 0; + |for (int $idxName = 0; $idxName < $argsName.length; $idxName++) { + | $keyArgsName[$idxName] = $argsName[$idxName].keyArray(); + | $valArgsName[$idxName] = $argsName[$idxName].valueArray(); + | $numElementsName += $argsName[$idxName].numElements(); |} + |if ($numElementsName > ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}) { --- End diff -- this check is not really correct, as we are not considering duplicates IIUC. I think we can change this behavior using `putAll` and checking the size in the loop.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org