Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/23217#discussion_r238690465 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilder.scala --- @@ -47,13 +48,17 @@ class ArrayBasedMapBuilder(keyType: DataType, valueType: DataType) extends Seria private lazy val keyGetter = InternalRow.getAccessor(keyType) private lazy val valueGetter = InternalRow.getAccessor(valueType) - def put(key: Any, value: Any): Unit = { + def put(key: Any, value: Any, withSizeCheck: Boolean = false): Unit = { if (key == null) { throw new RuntimeException("Cannot use null as map key.") } val index = keyToIndex.getOrDefault(key, -1) if (index == -1) { + if (withSizeCheck && size >= ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH) { --- End diff -- this flag is just for perf reasons, we can skip the check in some conditions and I didn't want to introduce perf overhead if not needed. If we remove the flag we would do the comparison for each item, also when it is not needed.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org