kokila-19 commented on code in PR #5724: URL: https://github.com/apache/hive/pull/5724#discussion_r2273026213
########## iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java: ########## @@ -590,6 +600,33 @@ public static TransformSpec getTransformSpec(Table table, String transformName, return spec; } + public static <T> List<T> readColStats(Table table, Long snapshotId, Predicate<BlobMetadata> filter) { + List<T> colStats = Lists.newArrayList(); + + Path statsPath = IcebergTableUtil.getColStatsPath(table, snapshotId); + if (statsPath == null) { + return colStats; + } + try (PuffinReader reader = Puffin.read(table.io().newInputFile(statsPath.toString())).build()) { + List<BlobMetadata> blobMetadata = reader.fileMetadata().blobs(); + + if (filter != null) { + blobMetadata = blobMetadata.stream().filter(filter) + .toList(); + } + Iterator<ByteBuffer> it = Iterables.transform(reader.readAll(blobMetadata), Pair::second).iterator(); + LOG.info("Using col stats from : {}", statsPath); + + while (it.hasNext()) { + byte[] byteBuffer = ByteBuffers.toByteArray(it.next()); + colStats.add(SerializationUtils.deserialize(byteBuffer)); + } + } catch (Exception e) { + LOG.warn(" Unable to read col stats: ", e); Review Comment: Nit: There is an extra space at the start of the log message. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org