szehon-ho commented on code in PR #5376:
URL: https://github.com/apache/iceberg/pull/5376#discussion_r949704407


##########
core/src/main/java/org/apache/iceberg/MetricsUtil.java:
##########
@@ -56,4 +64,123 @@ public static MetricsModes.MetricsMode metricsMode(
     String columnName = inputSchema.findColumnName(fieldId);
     return metricsConfig.columnMode(columnName);
   }
+
+  /**
+   * Return a readable metrics map
+   *
+   * @param schema schema of original data table
+   * @param namesById pre-computed map of all column ids in schema to readable 
name, see {@link
+   *     org.apache.iceberg.types.TypeUtil#indexNameById(Types.StructType)}
+   * @param contentFile content file with metrics
+   * @return map of readable column name to column metric, of which the bounds 
are made readable
+   */
+  public static Map<String, StructLike> readableMetricsMap(
+      Schema schema, Map<Integer, String> namesById, ContentFile<?> 
contentFile) {
+    Map<String, StructLike> metricsStruct = 
Maps.newHashMapWithExpectedSize(namesById.size());
+
+    Map<Integer, Long> columnSizes = contentFile.columnSizes();
+    Map<Integer, Long> valueCounts = contentFile.valueCounts();
+    Map<Integer, Long> nullValueCounts = contentFile.nullValueCounts();
+    Map<Integer, Long> nanValueCounts = contentFile.nanValueCounts();
+    Map<Integer, ByteBuffer> lowerBounds = contentFile.lowerBounds();
+    Map<Integer, ByteBuffer> upperBounds = contentFile.upperBounds();
+
+    for (int id : namesById.keySet()) {
+      Types.NestedField field = schema.findField(id);
+      if (field.type().isPrimitiveType()) {
+        // Iceberg stores metrics only for primitive types
+        String colName = namesById.get(id);
+        ReadableMetricsStruct struct =
+            new ReadableMetricsStruct(
+                columnSizes == null ? null : columnSizes.get(id),
+                valueCounts == null ? null : valueCounts.get(id),
+                nullValueCounts == null ? null : nullValueCounts.get(id),
+                nanValueCounts == null ? null : nanValueCounts.get(id),
+                lowerBounds == null ? null : convertToReadable(field, 
lowerBounds.get(id)),
+                upperBounds == null ? null : convertToReadable(field, 
upperBounds.get(id)));
+        metricsStruct.put(colName, struct);
+      }
+    }
+    return metricsStruct;
+  }
+
+  public static String convertToReadable(Types.NestedField field, ByteBuffer 
value) {
+    if (field == null || value == null) {
+      return null;
+    }
+    try {
+      return Transforms.identity(field.type())
+          .toHumanString(Conversions.fromByteBuffer(field.type(), value));
+    } catch (Exception e) {
+      LOG.warn("Error converting metric to readable form", e);
+      return null;
+    }
+  }
+
+  public static class ReadableMetricsStruct implements StructLike {
+
+    private final Long columnSize;
+    private final Long valueCount;
+    private final Long nullValueCount;
+    private final Long nanValueCount;
+    private final String lowerBound;
+    private final String upperBound;
+
+    public ReadableMetricsStruct(
+        Long columnSize,
+        Long valueCount,
+        Long nullValueCount,
+        Long nanValueCount,
+        String lowerBound,
+        String upperBound) {
+      this.columnSize = columnSize;
+      this.valueCount = valueCount;
+      this.nullValueCount = nullValueCount;
+      this.nanValueCount = nanValueCount;
+      this.lowerBound = lowerBound;
+      this.upperBound = upperBound;
+    }
+
+    @Override
+    public int size() {
+      return 6;
+    }
+
+    @Override
+    public <T> T get(int pos, Class<T> javaClass) {
+      Object value;
+      switch (pos) {

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to