Github user a-roberts commented on a diff in the pull request:
https://github.com/apache/spark/pull/16196#discussion_r91301705
--- Diff: core/src/main/scala/org/apache/spark/util/SizeEstimator.scala ---
@@ -89,7 +90,13 @@ object SizeEstimator extends Logging {
// A cache of ClassInfo objects for each class
// We use weakKeys to allow GC of dynamically created classes
- private val classInfos = new MapMaker().weakKeys().makeMap[Class[_],
ClassInfo]()
+ private val classInfos = new ThreadLocal[WeakHashMap[Class[_],
ClassInfo]] {
+ override def initialValue(): java.util.WeakHashMap[Class[_],
ClassInfo] = {
+ val toReturn = new WeakHashMap[Class[_], ClassInfo]()
+ toReturn.put(classOf[Object], new ClassInfo(objectSize, new
Array[Int](0)))
+ return toReturn
--- End diff --
Built and profiled, averaging 42 sec run times with the initial commit,
averaging 45 second run times with this. No changes = 48 sec.
My code as a diff (so using a ConcurrentHashMap and var not val so we can
initialise it later) provided here:
```
import java.lang.management.ManagementFactory
import java.lang.reflect.{Field, Modifier}
import java.util.{IdentityHashMap, WeakHashMap}
-import java.util.concurrent.ThreadLocalRandom
+import java.util.concurrent.{ThreadLocalRandom, ConcurrentMap}
import scala.collection.mutable.ArrayBuffer
import scala.concurrent.util.Unsafe
@@ -88,16 +88,6 @@ object SizeEstimator extends Logging {
// TODO: Is this arch dependent ?
private val ALIGN_SIZE = 8
- // A cache of ClassInfo objects for each class
- // We use weakKeys to allow GC of dynamically created classes
- private val classInfos = new ThreadLocal[WeakHashMap[Class[_],
ClassInfo]] {
- override def initialValue(): java.util.WeakHashMap[Class[_],
ClassInfo] = {
- val toReturn = new WeakHashMap[Class[_], ClassInfo]()
- toReturn.put(classOf[Object], new ClassInfo(objectSize, new
Array[Int](0)))
- return toReturn
- }
- }
-
// Object and pointer sizes are arch dependent
private var is64bit = false
@@ -109,6 +99,8 @@ object SizeEstimator extends Logging {
// Minimum size of a java.lang.Object
private var objectSize = 8
+ private var classInfos: ConcurrentMap[Class[_], ClassInfo] = null
+
initialize()
// Sets object size, pointer size based on architecture and
CompressedOops settings
@@ -126,6 +118,9 @@ object SizeEstimator extends Logging {
}
}
pointerSize = if (is64bit && !isCompressedOops) 8 else 4
+
+ classInfos = new MapMaker().weakKeys().makeMap[Class[_], ClassInfo]()
+ classInfos.put(classOf[Object], new ClassInfo(objectSize, new
Array[Int](0)))
}
private def getIsCompressedOops: Boolean = {
@@ -338,7 +333,7 @@ object SizeEstimator extends Logging {
*/
private def getClassInfo(cls: Class[_]): ClassInfo = {
// Check whether we've already cached a ClassInfo for this class
- val info = classInfos.get().get(cls)
+ val info = classInfos.get(cls)
if (info != null) {
return info
}
@@ -371,7 +366,7 @@ object SizeEstimator extends Logging {
// Create and cache a new ClassInfo
val newInfo = new ClassInfo(shellSize, fieldOffsets.toArray)
- classInfos.get().put(cls, newInfo)
+ classInfos.put(cls, newInfo)
newInfo
}
```
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]