Github user a-roberts commented on a diff in the pull request:

    https://github.com/apache/spark/pull/16196#discussion_r91301705
  
    --- Diff: core/src/main/scala/org/apache/spark/util/SizeEstimator.scala ---
    @@ -89,7 +90,13 @@ object SizeEstimator extends Logging {
     
       // A cache of ClassInfo objects for each class
       // We use weakKeys to allow GC of dynamically created classes
    -  private val classInfos = new MapMaker().weakKeys().makeMap[Class[_], 
ClassInfo]()
    +  private val classInfos = new ThreadLocal[WeakHashMap[Class[_], 
ClassInfo]] {
    +    override def initialValue(): java.util.WeakHashMap[Class[_], 
ClassInfo] = {
    +      val toReturn = new WeakHashMap[Class[_], ClassInfo]()
    +      toReturn.put(classOf[Object], new ClassInfo(objectSize, new 
Array[Int](0)))
    +      return toReturn
    --- End diff --
    
    Built and profiled, averaging 42 sec run times with the initial commit, 
averaging 45 second run times with this. No changes = 48 sec.
    
    My code as a diff (so using a ConcurrentHashMap and var not val so we can 
initialise it later) provided here:
    
    ```
     import java.lang.management.ManagementFactory
     import java.lang.reflect.{Field, Modifier}
     import java.util.{IdentityHashMap, WeakHashMap}
    -import java.util.concurrent.ThreadLocalRandom
    +import java.util.concurrent.{ThreadLocalRandom, ConcurrentMap}
    
     import scala.collection.mutable.ArrayBuffer
     import scala.concurrent.util.Unsafe
    @@ -88,16 +88,6 @@ object SizeEstimator extends Logging {
       // TODO: Is this arch dependent ?
       private val ALIGN_SIZE = 8
    
    -  // A cache of ClassInfo objects for each class
    -  // We use weakKeys to allow GC of dynamically created classes
    -  private val classInfos = new ThreadLocal[WeakHashMap[Class[_], 
ClassInfo]] {
    -    override def initialValue(): java.util.WeakHashMap[Class[_], 
ClassInfo] = {
    -      val toReturn = new WeakHashMap[Class[_], ClassInfo]()
    -      toReturn.put(classOf[Object], new ClassInfo(objectSize, new 
Array[Int](0)))
    -      return toReturn
    -    }
    -  }
    -
       // Object and pointer sizes are arch dependent
       private var is64bit = false
    
    @@ -109,6 +99,8 @@ object SizeEstimator extends Logging {
       // Minimum size of a java.lang.Object
       private var objectSize = 8
    
    +  private var classInfos: ConcurrentMap[Class[_], ClassInfo] = null
    +
       initialize()
    
       // Sets object size, pointer size based on architecture and 
CompressedOops settings
    @@ -126,6 +118,9 @@ object SizeEstimator extends Logging {
           }
         }
         pointerSize = if (is64bit && !isCompressedOops) 8 else 4
    +
    +    classInfos = new MapMaker().weakKeys().makeMap[Class[_], ClassInfo]()
    +    classInfos.put(classOf[Object], new ClassInfo(objectSize, new 
Array[Int](0)))
       }
    
       private def getIsCompressedOops: Boolean = {
    @@ -338,7 +333,7 @@ object SizeEstimator extends Logging {
        */
       private def getClassInfo(cls: Class[_]): ClassInfo = {
         // Check whether we've already cached a ClassInfo for this class
    -    val info = classInfos.get().get(cls)
    +    val info = classInfos.get(cls)
         if (info != null) {
           return info
         }
    @@ -371,7 +366,7 @@ object SizeEstimator extends Logging {
    
         // Create and cache a new ClassInfo
         val newInfo = new ClassInfo(shellSize, fieldOffsets.toArray)
    -    classInfos.get().put(cls, newInfo)
    +    classInfos.put(cls, newInfo)
         newInfo
       }
    ```


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to