amogh-jahagirdar commented on code in PR #12593:
URL: https://github.com/apache/iceberg/pull/12593#discussion_r2011084383
##########
core/src/main/java/org/apache/iceberg/SnapshotProducer.java:
##########
@@ -283,32 +284,55 @@ public Snapshot apply() {
throw new RuntimeIOException(e, "Failed to write manifest list file");
}
+ Map<String, String> summary = summary();
+ String operation = operation();
+
Long addedRows = null;
- Long lastRowId = null;
- if (base.rowLineageEnabled()) {
- addedRows = calculateAddedRows(manifests);
- lastRowId = base.nextRowId();
+ Long firstRowId = null;
Review Comment:
Hm should this still be called `lastRowId`?
##########
core/src/main/java/org/apache/iceberg/SnapshotProducer.java:
##########
@@ -283,32 +284,55 @@ public Snapshot apply() {
throw new RuntimeIOException(e, "Failed to write manifest list file");
}
+ Map<String, String> summary = summary();
+ String operation = operation();
+
Long addedRows = null;
- Long lastRowId = null;
- if (base.rowLineageEnabled()) {
- addedRows = calculateAddedRows(manifests);
- lastRowId = base.nextRowId();
+ Long firstRowId = null;
+ if (base.formatVersion() >= 3) {
+ addedRows = calculateAddedRows(operation, summary, manifests);
+ firstRowId = base.nextRowId();
}
return new BaseSnapshot(
sequenceNumber,
snapshotId(),
parentSnapshotId,
System.currentTimeMillis(),
- operation(),
- summary(base),
+ operation,
+ summaryWithTotals(base, summary),
base.currentSchemaId(),
manifestList.location(),
- lastRowId,
+ firstRowId,
addedRows);
}
- private Long calculateAddedRows(List<ManifestFile> manifests) {
+ private Long calculateAddedRows(
+ String operation, Map<String, String> summary, List<ManifestFile>
manifests) {
+ if (summary != null) {
+ long addedRecords =
+ PropertyUtil.propertyAsLong(summary,
SnapshotSummary.ADDED_RECORDS_PROP, 0L);
+ if (DataOperations.REPLACE.equals(operation)) {
+ long replacedRecords =
+ PropertyUtil.propertyAsLong(summary,
SnapshotSummary.DELETED_RECORDS_PROP, 0L);
+ // added may be less than replaced when records are already deleted by
delete files
+ Preconditions.checkArgument(
+ addedRecords <= replacedRecords,
+ "Invalid REPLACE operation: %s added records > %s replaced
records",
+ addedRecords,
+ replacedRecords);
+ return 0L;
+ }
+
+ return addedRecords;
+ }
+
return manifests.stream()
.filter(
manifest ->
manifest.snapshotId() == null
|| Objects.equals(manifest.snapshotId(), this.snapshotId))
+ .filter(manifest -> manifest.content() == ManifestContent.DATA)
Review Comment:
Good catch
##########
core/src/test/java/org/apache/iceberg/TestTableMetadata.java:
##########
@@ -232,8 +231,6 @@ public void testJsonConversion() throws Exception {
assertThat(metadata.statisticsFiles()).isEqualTo(statisticsFiles);
assertThat(metadata.partitionStatisticsFiles()).isEqualTo(partitionStatisticsFiles);
assertThat(metadata.refs()).isEqualTo(refs);
-
assertThat(metadata.rowLineageEnabled()).isEqualTo(expected.rowLineageEnabled());
- assertThat(metadata.nextRowId()).isEqualTo(expected.nextRowId());
Review Comment:
Yeah I think the `nextRowId` assertions should still be retained
##########
core/src/main/java/org/apache/iceberg/SnapshotProducer.java:
##########
@@ -283,32 +284,55 @@ public Snapshot apply() {
throw new RuntimeIOException(e, "Failed to write manifest list file");
}
+ Map<String, String> summary = summary();
+ String operation = operation();
+
Long addedRows = null;
- Long lastRowId = null;
- if (base.rowLineageEnabled()) {
- addedRows = calculateAddedRows(manifests);
- lastRowId = base.nextRowId();
+ Long firstRowId = null;
Review Comment:
Ah I see, no it really is the the first row ID of the new snapshot
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]