jiangxb1987 commented on a change in pull request #27968:
[SPARK-31202][CORE]Improve SizeEstimator for AppendOnlyMap
URL: https://github.com/apache/spark/pull/27968#discussion_r396917895
##########
File path: core/src/main/scala/org/apache/spark/util/SizeEstimator.scala
##########
@@ -290,6 +306,88 @@ object SizeEstimator extends Logging {
size
}
+
+ /** Visit AppendOnlyMap data field which stored all the KVs, we handle this
field separately
+ * because the underlying type of the elems of this array is different, and
their size may vary
+ * significantly, for example, the value may be an array-like buffer to
store merged or grouped
+ * values for aggregation.
+ * */
+ private def visitKVDataArray(
+ data: Array[AnyRef],
+ keyPositions: java.util.BitSet,
+ totalValueElements: Int,
+ state: SearchState): Unit = {
+ val length = data.length
+ var arrSize: Long = alignSize(objectSize + INT_SIZE)
+ state.size += arrSize
+ state.size += alignSize((length - keyPositions.size) * pointerSize)
+
+ if (length <= ARRAY_SIZE_FOR_SAMPLING) {
+ for (e <- data) {
+ state.enqueue(e)
+ }
+ } else {
+ val rand = new Random(42)
+ val drawn = new OpenHashSet[Int](2 * ARRAY_SAMPLE_SIZE)
+ val (numKeys1, keySize1, numValueElements1, valueSize1) =
+ sampleKVDataArray(data, keyPositions, state, rand, drawn, length)
+ val (numKeys2, keySize2, numValueElements2, valueSize2) =
+ sampleKVDataArray(data, keyPositions, state, rand, drawn, length)
+ val (_, keySizeForMax, numKeysForMin, keySizeForMin) = if (keySize1 >
keySize2) {
+ (numKeys1, keySize1, numKeys2, keySize2)
+ } else (numKeys2, keySize2, numKeys1, keySize1)
+ val keySize = keySizeForMax + (keySizeForMin *
Review comment:
What does this try to do?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]