huan233usc commented on code in PR #54667:
URL: https://github.com/apache/spark/pull/54667#discussion_r2900166517


##########
sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala:
##########
@@ -425,4 +427,55 @@ abstract class StatisticsCollectionTestBase extends 
QueryTest with SQLTestUtils
       assert(relation.stats.attributeStats.isEmpty)
     }
   }
+
+  test("CatalogStatistics.toV2Stats") {
+    val schema = StructType(Seq(
+      StructField("id", IntegerType),
+      StructField("name", StringType)))
+
+    val idColStat = CatalogColumnStat(
+      distinctCount = Some(10),
+      min = Some("1"),
+      max = Some("100"),
+      nullCount = Some(0),
+      avgLen = Some(4),
+      maxLen = Some(4))
+
+    val catalogStats = CatalogStatistics(
+      sizeInBytes = 1024,
+      rowCount = Some(10),
+      colStats = Map(
+        "id" -> idColStat,
+        // "extra" is not in schema — should be silently skipped
+        "extra" -> CatalogColumnStat(distinctCount = Some(5))))
+
+    val v2Stats = catalogStats.toV2Stats(schema)
+
+    // sizeInBytes is always populated
+    assert(v2Stats.sizeInBytes().getAsLong === 1024L)
+
+    // numRows is present when rowCount is defined
+    assert(v2Stats.numRows().isPresent)
+    assert(v2Stats.numRows().getAsLong === 10L)
+
+    // only columns present in schema are returned; "extra" is skipped
+    val colStats = v2Stats.columnStats()
+    assert(colStats.size() === 1, "only 'id' is in schema; 'extra' should be 
skipped")
+    val idV2 = colStats.get(FieldReference.apply("id"))

Review Comment:
   Done — updated the lookup to `FieldReference.column("id")` to match the 
production fix. Also added a histogram round-trip test that verifies `height`, 
`lo`, `hi`, and `ndv` for each bin, plus a no-histogram case confirming 
`histogram()` returns an empty `Optional`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to