Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20624#discussion_r170474720
  
    --- Diff: 
sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala ---
    @@ -1059,22 +1054,22 @@ private[spark] class HiveExternalCatalog(conf: 
SparkConf, hadoopConf: Configurat
           None
         } else {
     
    -      val colStats = new mutable.HashMap[String, ColumnStat]
    -
    -      // For each column, recover its column stats. Note that this is 
currently a O(n^2) operation,
    -      // but given the number of columns it usually not enormous, this is 
probably OK as a start.
    -      // If we want to map this a linear operation, we'd need a stronger 
contract between the
    -      // naming convention used for serialization.
    -      schema.foreach { field =>
    -        if (statsProps.contains(columnStatKeyPropName(field.name, 
ColumnStat.KEY_VERSION))) {
    -          // If "version" field is defined, then the column stat is 
defined.
    -          val keyPrefix = columnStatKeyPropName(field.name, "")
    -          val colStatMap = 
statsProps.filterKeys(_.startsWith(keyPrefix)).map { case (k, v) =>
    -            (k.drop(keyPrefix.length), v)
    -          }
    -          ColumnStat.fromMap(table, field, colStatMap).foreach { cs =>
    -            colStats += field.name -> cs
    -          }
    +      val colStats = new mutable.HashMap[String, CatalogColumnStat]
    +      val statPropsForField = new mutable.HashMap[String, 
mutable.HashMap[String, String]]
    +
    +      val colStatsProps = 
properties.filterKeys(_.startsWith(STATISTICS_COL_STATS_PREFIX)).map {
    +        case (k, v) => k.drop(STATISTICS_COL_STATS_PREFIX.length) -> v
    +      }
    +
    +      // Find all the column names by matching the KEY_VERSION properties 
for them.
    +      val fieldNames = colStatsProps.keys.filter {
    --- End diff --
    
    `fieldNames` is not being used.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to