Copilot commented on code in PR #54572:
URL: https://github.com/apache/spark/pull/54572#discussion_r2872593232


##########
core/src/test/scala/org/apache/spark/util/SizeEstimatorSuite.scala:
##########
@@ -243,4 +247,76 @@ class SizeEstimatorSuite
     assertResult(2015)(SizeEstimator.estimate(new DummyClass8))
     assertResult(20206)(SizeEstimator.estimate(Array.fill(10)(new 
DummyClass8)))
   }
+
+  // Tests for JEP 450/519: Compact Object Headers
+  // With compact object headers, the object header is 8 bytes (vs. 12 with 
compressed oops,
+  // or 16 without) because the class pointer is encoded inside the mark word.
+  // Object references are still 4 bytes (like compressed oops).
+
+  test("64-bit arch with compact object headers: simple classes") {
+    reinitializeSizeEstimator("amd64", "true", "true")
+    // objectSize = 8 (compact header), pointerSize = 4
+    // DummyClass1: 8-byte header, no fields => 8 bytes
+    assertResult(8)(SizeEstimator.estimate(new DummyClass1))
+    // DummyClass2: 8-byte header + Int(4) => 12, aligned to 16
+    assertResult(16)(SizeEstimator.estimate(new DummyClass2))
+    // DummyClass3: 8-byte header + Int(4) + Double(8) => 20, aligned to 24
+    assertResult(24)(SizeEstimator.estimate(new DummyClass3))
+    // DummyClass4: 8-byte header + Int(4) + pointer(4) => 16, aligned to 16
+    assertResult(16)(SizeEstimator.estimate(new DummyClass4(null)))
+    // DummyClass4 with DummyClass3: 16 + 24 = 40
+    assertResult(40)(SizeEstimator.estimate(new DummyClass4(new DummyClass3)))
+  }
+
+  test("64-bit arch with compact object headers: primitive wrapper objects") {
+    reinitializeSizeEstimator("amd64", "true", "true")
+    // Boolean/Byte/Char/Short/Int/Float wrappers: 8-byte header + primitive 
up to 4 bytes => 16
+    assertResult(16)(SizeEstimator.estimate(java.lang.Boolean.TRUE))
+    assertResult(16)(SizeEstimator.estimate(java.lang.Byte.valueOf("1")))
+    assertResult(16)(SizeEstimator.estimate(java.lang.Character.valueOf('1')))
+    assertResult(16)(SizeEstimator.estimate(java.lang.Short.valueOf("1")))
+    assertResult(16)(SizeEstimator.estimate(java.lang.Integer.valueOf(1)))
+    assertResult(16)(SizeEstimator.estimate(java.lang.Float.valueOf(1.0f)))
+    // Long/Double wrappers: 8-byte header + 8-byte primitive => 16
+    assertResult(16)(SizeEstimator.estimate(java.lang.Long.valueOf(1)))
+    assertResult(16)(SizeEstimator.estimate(java.lang.Double.valueOf(1.0)))
+  }
+
+  test("64-bit arch with compact object headers: primitive arrays") {
+    reinitializeSizeEstimator("amd64", "true", "true")
+    // Array header = objectSize(8) + length Int(4) = 12, aligned to 16
+    // Array[Byte](10): 16 + alignSize(10*1=10) = 16 + 16 = 32
+    assertResult(32)(SizeEstimator.estimate(new Array[Byte](10)))
+    // Array[Char](10): 16 + alignSize(10*2=20) = 16 + 24 = 40
+    assertResult(40)(SizeEstimator.estimate(new Array[Char](10)))
+    // Array[Int](10): 16 + alignSize(10*4=40) = 16 + 40 = 56
+    assertResult(56)(SizeEstimator.estimate(new Array[Int](10)))
+    // Array[Long](10): 16 + alignSize(10*8=80) = 16 + 80 = 96
+    assertResult(96)(SizeEstimator.estimate(new Array[Long](10)))
+  }
+
+  test("64-bit arch with compact object headers: strings") {
+    reinitializeSizeEstimator("amd64", "true", "true")
+    // DummyString has: pointer(arr,4) + Int(hashCode,4) + Int(hash32,4) = 12 
fields
+    // objectSize=8, fields=12 => shellSize=20, aligned to 24

Review Comment:
   The comment says "= 12 fields" but the calculation is in bytes (4 + 4 + 4 = 
12 bytes). Consider correcting the wording to avoid confusion when maintaining 
these size derivations.
   ```suggestion
       // DummyString has: pointer(arr,4) + Int(hashCode,4) + Int(hash32,4) = 
12 bytes
       // objectSize=8, fieldBytes=12 => shellSize=20, aligned to 24
   ```



##########
core/src/main/scala/org/apache/spark/util/SizeEstimator.scala:
##########
@@ -136,28 +159,38 @@ object SizeEstimator extends Logging {
       return System.getProperty("java.vm.info").contains("Compressed Ref")
     }
 
+    getHotSpotVMOption("UseCompressedOops") match {
+      case Some(value) => value.contains("true")
+      case None =>
+        // Guess whether they've enabled UseCompressedOops based on whether 
maxMemory < 32 GB
+        val guess = Runtime.getRuntime.maxMemory < (32L*1024*1024*1024)
+        logWarning(log"Failed to check whether UseCompressedOops is set; " +
+          log"assuming " + (if (guess) log"yes" else log"not"))
+        guess
+    }
+  }
+
+  /**
+   * Retrieves the string representation of a HotSpot VM option via the 
HotSpotDiagnosticMXBean.
+   * Returns [[Some]] with the option's string value on success, or [[None]] 
if the option cannot
+   * be read (e.g. on non-HotSpot JVMs or when the option does not exist).
+   */

Review Comment:
   The scaladoc for `getHotSpotVMOption` says it returns the option's string 
value, but the implementation returns `VMOption.toString` (which typically 
includes the option name/metadata). This is brittle and can break if `toString` 
formatting changes; consider reflecting on 
`com.sun.management.VMOption.getValue` (similar to `Utils.checkUseGC`) and 
returning just the value, or update the scaladoc to match the behavior.



##########
core/src/main/scala/org/apache/spark/util/SizeEstimator.scala:
##########
@@ -97,32 +96,56 @@ object SizeEstimator extends Logging {
   // Size of an object reference
   // Based on https://wikis.oracle.com/display/HotSpotInternals/CompressedOops
   private var isCompressedOops = false
+
+  // Whether Compact Object Headers (JEP 450/519) are enabled.
+  // With Compact Object Headers, the object header is 8 bytes on 64-bit JVMs
+  // (the class pointer is encoded inside the mark word), so objectSize = 8
+  // and pointerSize = 4 regardless of UseCompressedOops.
+  private var isCompactObjectHeaders = false
+
   private var pointerSize = 4
 
   // Minimum size of a java.lang.Object
   private var objectSize = 8
 
   initialize()
 
-  // Sets object size, pointer size based on architecture and CompressedOops 
settings
-  // from the JVM.
+  // Sets object size, pointer size based on architecture, CompressedOops
+  // and CompactObjectHeaders settings from the JVM.
   private def initialize(): Unit = {
     val arch = Utils.osArch
     is64bit = arch.contains("64") || arch.contains("s390x")
+    isCompactObjectHeaders = is64bit && getIsCompactObjectHeaders
     isCompressedOops = getIsCompressedOops
 
     objectSize = if (!is64bit) 8 else {
-      if (!isCompressedOops) {
+      if (isCompactObjectHeaders) {
+        8
+      } else if (!isCompressedOops) {
         16
       } else {
         12
       }
     }
-    pointerSize = if (is64bit && !isCompressedOops) 8 else 4
+    pointerSize = if (is64bit && !isCompressedOops && !isCompactObjectHeaders) 
8 else 4
     classInfos.clear()
     classInfos.put(classOf[Object], new ClassInfo(objectSize, Nil))
   }
 
+  private def getIsCompactObjectHeaders: Boolean = {
+    // This is only used by tests to override the detection of Compact Object 
Headers.
+    if (System.getProperty(TEST_USE_COMPACT_OBJECT_HEADERS_KEY) != null) {
+      return System.getProperty(TEST_USE_COMPACT_OBJECT_HEADERS_KEY).toBoolean
+    }
+
+    // Compact Object Headers is introduced in JEP 450/519, JDK 24/25.
+    if (Runtime.version().feature() < 24) {
+      return false
+    }
+
+    getHotSpotVMOption("UseCompactObjectHeaders").exists(_.contains("true"))
+  }

Review Comment:
   `getIsCompactObjectHeaders` currently checks 
`getHotSpotVMOption(...).exists(_.contains("true"))`. Since 
`getHotSpotVMOption` returns `VMOption.toString`, substring matching is fragile 
(the string may include multiple `true/false` tokens). Prefer extracting the 
actual option value via `com.sun.management.VMOption.getValue` and comparing it 
exactly to "true".



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to