plusplusjiajia commented on code in PR #6503:
URL: https://github.com/apache/paimon/pull/6503#discussion_r2490321026
##########
paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergCommitCallback.java:
##########
@@ -1230,6 +1328,147 @@ private List<IcebergManifestFileMeta>
createDvManifestFileMetas(Snapshot snapsho
icebergDvEntries.iterator(), snapshotId,
IcebergManifestFileMeta.Content.DELETES);
}
+ //
-------------------------------------------------------------------------------------
+ // Snapshot Summary Computation
+ //
-------------------------------------------------------------------------------------
+
+ private static class SummaryMetrics {
+ long addedDataFiles;
+ long addedRecords;
+ long addedFilesSize;
+ long deletedDataFiles;
+ long deletedRecords;
+ long deletedFilesSize;
+ long changedPartitionCount;
+ long totalDataFiles;
+ long totalRecords;
+ long totalFilesSize;
+ long totalDeleteFiles;
+ long totalPositionDeletes;
+ long totalEqualityDeletes;
+ }
+
+ private IcebergSnapshotSummary computeSnapshotSummary(
+ String operation, Snapshot snapshot, SummaryMetrics metrics) {
+
+ IcebergSnapshotSummary summary = new IcebergSnapshotSummary(operation);
+
+ long addedDataFiles = Math.max(0, metrics.addedDataFiles);
+ long addedRecords = Math.max(0, metrics.addedRecords);
+ long addedFilesSize = Math.max(0, metrics.addedFilesSize);
+ long deletedDataFiles = Math.max(0, metrics.deletedDataFiles);
+ long deletedRecords = Math.max(0, metrics.deletedRecords);
+ long deletedFilesSize = Math.max(0, metrics.deletedFilesSize);
+ long changedPartitionCount = Math.max(0,
metrics.changedPartitionCount);
+ long totalRecords = Math.max(0, metrics.totalRecords);
+ long totalDataFiles = Math.max(0, metrics.totalDataFiles);
+ long totalFilesSize = Math.max(0, metrics.totalFilesSize);
+ long totalDeleteFiles = Math.max(0, metrics.totalDeleteFiles);
+ long totalPositionDeletes = Math.max(0, metrics.totalPositionDeletes);
+ long totalEqualityDeletes = Math.max(0, metrics.totalEqualityDeletes);
+
+ summary.put("added-data-files", Long.toString(addedDataFiles));
+ summary.put("added-records", Long.toString(addedRecords));
+ summary.put("added-files-size", Long.toString(addedFilesSize));
+ summary.put("deleted-data-files", Long.toString(deletedDataFiles));
+ summary.put("deleted-records", Long.toString(deletedRecords));
+ summary.put("deleted-files-size", Long.toString(deletedFilesSize));
+ summary.put("changed-partition-count",
Long.toString(changedPartitionCount));
+ summary.put("total-records", Long.toString(totalRecords));
+ summary.put("total-data-files", Long.toString(totalDataFiles));
+ summary.put("total-files-size", Long.toString(totalFilesSize));
+ summary.put("total-delete-files", Long.toString(totalDeleteFiles));
+ summary.put("total-position-deletes",
Long.toString(totalPositionDeletes));
+ summary.put("total-equality-deletes",
Long.toString(totalEqualityDeletes));
+
+ Map<String, String> properties = snapshot.properties();
+ if (properties != null) {
+ properties.forEach(
+ (key, value) -> {
+ if (value != null) {
+ summary.put(key, value);
+ }
+ });
+ }
+
+ return summary;
+ }
+
+ private long computeLiveDataFileCount(List<IcebergManifestFileMeta>
manifestMetas) {
+ return manifestMetas.stream()
+ .mapToLong(
+ meta ->
+ meta.addedFilesCount()
+ + meta.existingFilesCount()
+ - meta.deletedFilesCount())
+ .sum();
+ }
+
+ private long computeLiveRowCount(List<IcebergManifestFileMeta>
manifestMetas) {
+ return manifestMetas.stream()
+ .mapToLong(
+ meta ->
+ meta.addedRowsCount()
+ + meta.existingRowsCount()
+ - meta.deletedRowsCount())
+ .sum();
+ }
+
+ private long computeLiveDeleteFileCount(List<IcebergManifestFileMeta>
manifestMetas) {
+ return manifestMetas.stream()
+ .mapToLong(
+ meta ->
+ meta.addedFilesCount()
+ + meta.existingFilesCount()
+ - meta.deletedFilesCount())
+ .sum();
+ }
+
+ private long computeDeleteRowCount(List<IcebergManifestFileMeta>
manifestMetas) {
Review Comment:
computeLiveRowCount is the same with computeDeleteRowCount now, please check
them.
##########
paimon-core/src/main/java/org/apache/paimon/iceberg/IcebergCommitCallback.java:
##########
@@ -1230,6 +1328,147 @@ private List<IcebergManifestFileMeta>
createDvManifestFileMetas(Snapshot snapsho
icebergDvEntries.iterator(), snapshotId,
IcebergManifestFileMeta.Content.DELETES);
}
+ //
-------------------------------------------------------------------------------------
+ // Snapshot Summary Computation
+ //
-------------------------------------------------------------------------------------
+
+ private static class SummaryMetrics {
+ long addedDataFiles;
+ long addedRecords;
+ long addedFilesSize;
+ long deletedDataFiles;
+ long deletedRecords;
+ long deletedFilesSize;
+ long changedPartitionCount;
+ long totalDataFiles;
+ long totalRecords;
+ long totalFilesSize;
+ long totalDeleteFiles;
+ long totalPositionDeletes;
+ long totalEqualityDeletes;
+ }
+
+ private IcebergSnapshotSummary computeSnapshotSummary(
+ String operation, Snapshot snapshot, SummaryMetrics metrics) {
+
+ IcebergSnapshotSummary summary = new IcebergSnapshotSummary(operation);
+
+ long addedDataFiles = Math.max(0, metrics.addedDataFiles);
+ long addedRecords = Math.max(0, metrics.addedRecords);
+ long addedFilesSize = Math.max(0, metrics.addedFilesSize);
+ long deletedDataFiles = Math.max(0, metrics.deletedDataFiles);
+ long deletedRecords = Math.max(0, metrics.deletedRecords);
+ long deletedFilesSize = Math.max(0, metrics.deletedFilesSize);
+ long changedPartitionCount = Math.max(0,
metrics.changedPartitionCount);
+ long totalRecords = Math.max(0, metrics.totalRecords);
+ long totalDataFiles = Math.max(0, metrics.totalDataFiles);
+ long totalFilesSize = Math.max(0, metrics.totalFilesSize);
+ long totalDeleteFiles = Math.max(0, metrics.totalDeleteFiles);
+ long totalPositionDeletes = Math.max(0, metrics.totalPositionDeletes);
+ long totalEqualityDeletes = Math.max(0, metrics.totalEqualityDeletes);
+
+ summary.put("added-data-files", Long.toString(addedDataFiles));
Review Comment:
it's better to add some constants definition for these metrics.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]