Github user mgaido91 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/23124#discussion_r235866111
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
    @@ -646,34 +633,35 @@ case class MapConcat(children: Seq[Expression]) 
extends ComplexTypeMergingExpres
     
         val mapMerge =
           s"""
    -        |${ev.isNull} = $hasNullName;
    -        |if (!${ev.isNull}) {
    -        |  $arrayDataClass[] $keyArgsName = new 
$arrayDataClass[${mapCodes.size}];
    -        |  $arrayDataClass[] $valArgsName = new 
$arrayDataClass[${mapCodes.size}];
    -        |  long $numElementsName = 0;
    -        |  for (int $idxName = 0; $idxName < $argsName.length; $idxName++) 
{
    -        |    $keyArgsName[$idxName] = $argsName[$idxName].keyArray();
    -        |    $valArgsName[$idxName] = $argsName[$idxName].valueArray();
    -        |    $numElementsName += $argsName[$idxName].numElements();
    -        |  }
    -        |  if ($numElementsName > 
${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}) {
    -        |    throw new RuntimeException("Unsuccessful attempt to concat 
maps with " +
    -        |       $numElementsName + " elements due to exceeding the map 
size limit " +
    -        |       "${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}.");
    -        |  }
    -        |  $arrayDataClass $finKeysName = $keyConcat($keyArgsName,
    -        |    (int) $numElementsName);
    -        |  $arrayDataClass $finValsName = $valueConcat($valArgsName,
    -        |    (int) $numElementsName);
    -        |  ${ev.value} = new $arrayBasedMapDataClass($finKeysName, 
$finValsName);
    +        |ArrayData[] $keyArgsName = new ArrayData[${mapCodes.size}];
    +        |ArrayData[] $valArgsName = new ArrayData[${mapCodes.size}];
    +        |long $numElementsName = 0;
    +        |for (int $idxName = 0; $idxName < $argsName.length; $idxName++) {
    +        |  $keyArgsName[$idxName] = $argsName[$idxName].keyArray();
    +        |  $valArgsName[$idxName] = $argsName[$idxName].valueArray();
    +        |  $numElementsName += $argsName[$idxName].numElements();
             |}
    +        |if ($numElementsName > 
${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH}) {
    --- End diff --
    
    this check is not really correct, as we are not considering duplicates 
IIUC. I think we can change this behavior using `putAll` and checking the size 
in the loop.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to