gengliangwang commented on code in PR #54667:
URL: https://github.com/apache/spark/pull/54667#discussion_r2915171148
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala:
##########
@@ -299,6 +305,75 @@ object DataSourceV2Relation {
identifier: Option[Identifier]): DataSourceV2Relation =
create(table, catalog, identifier, CaseInsensitiveStringMap.empty)
+ /**
+ * This is used to transform catalog statistics to data source v2 statistics.
+ */
+ def toV2Stats(
Review Comment:
Let's make it more explicit: v1StatsToV2Stats
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Relation.scala:
##########
@@ -299,6 +305,75 @@ object DataSourceV2Relation {
identifier: Option[Identifier]): DataSourceV2Relation =
create(table, catalog, identifier, CaseInsensitiveStringMap.empty)
+ /**
+ * This is used to transform catalog statistics to data source v2 statistics.
+ */
+ def toV2Stats(
+ v1Statistics: CatalogStatistics,
+ schema: StructType): V2Statistics = {
+ val typeMap = schema.fields.map(f => f.name -> f.dataType).toMap
+ val colStatsMap: Map[NamedReference, ColumnStatistics] =
+ v1Statistics.colStats.flatMap { case (name, stat) =>
+ typeMap.get(name).map { dt =>
+ FieldReference.column(name) -> toV2ColStat(stat, name, dt)
+ }
+ }
+
+ val v2SizeInBytes = OptionalLong.of(v1Statistics.sizeInBytes.longValue)
+ val v2NumRows = v1Statistics.rowCount
+ .map(v => OptionalLong.of(v.longValue)).getOrElse(OptionalLong.empty())
+ val v2ColStats = new java.util.HashMap[NamedReference, ColumnStatistics]()
+ colStatsMap.foreach { case (k, v) => v2ColStats.put(k, v) }
+
+ new V2Statistics {
+ override def sizeInBytes(): OptionalLong = v2SizeInBytes
+ override def numRows(): OptionalLong = v2NumRows
+ override def columnStats(): java.util.Map[NamedReference,
ColumnStatistics] = v2ColStats
+ }
+ }
+
+ private def toV2ColStat(
Review Comment:
ditto
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]