aokolnychyi commented on a change in pull request #512: Extend RewriteManifests with a way to add/delete manifests URL: https://github.com/apache/incubator-iceberg/pull/512#discussion_r331814382
########## File path: core/src/main/java/org/apache/iceberg/BaseRewriteManifests.java ########## @@ -85,45 +90,69 @@ protected String operation() { @Override public RewriteManifests set(String property, String value) { - summaryProps.put(property, value); + summaryBuilder.set(property, value); return this; } @Override protected Map<String, String> summary() { - Map<String, String> result = new HashMap<>(); - result.putAll(summaryProps); - result.put(KEPT_CNT, Integer.toString(keptManifests.size())); - result.put(NEW_CNT, Integer.toString(newManifests.size())); - result.put(REPLACED_CNT, Integer.toString(replacedManifests.size())); - result.put(ENTRY_CNT, Long.toString(entryCount.get())); - return result; + summaryBuilder.set(KEPT_CNT, Integer.toString(keptManifests.size())); + summaryBuilder.set(NEW_CNT, Integer.toString(newManifests.size())); + summaryBuilder.set(REPLACED_CNT, Integer.toString(replacedManifests.size())); + summaryBuilder.set(ENTRY_CNT, Long.toString(entryCount.get())); + return summaryBuilder.build(); } @Override public RewriteManifests clusterBy(Function<DataFile, Object> func) { + Preconditions.checkState(!isDirectReplacement, INVALID_USAGE_ERR); this.clusterByFunc = func; return this; } @Override public RewriteManifests rewriteIf(Predicate<ManifestFile> pred) { + Preconditions.checkState(!isDirectReplacement, INVALID_USAGE_ERR); this.predicate = pred; return this; } @Override - public List<ManifestFile> apply(TableMetadata base) { - Preconditions.checkNotNull(clusterByFunc, "clusterBy function cannot be null"); + public RewriteManifests deleteManifest(ManifestFile manifest) { + Preconditions.checkState(clusterByFunc == null && predicate == null, INVALID_USAGE_ERR); + isDirectReplacement = true; + replacedManifests.add(manifest); + return this; + } + + @Override + public RewriteManifests addManifest(ManifestFile manifest) { + Preconditions.checkState(clusterByFunc == null && predicate == null, INVALID_USAGE_ERR); + isDirectReplacement = true; + // the manifest must be rewritten with this update's snapshot ID + try (ManifestReader reader = ManifestReader.read(ops.io().newInputFile(manifest.path()), ops.current()::spec)) { + OutputFile newManifestPath = manifestPath(manifestSuffix.getAndIncrement()); + Set<ManifestEntry.Status> allowedStatuses = Sets.newHashSet(ManifestEntry.Status.EXISTING); Review comment: I followed the logic that we already have in `BaseRewriteManifests` and `MergingSnapshotProducer`: rewriting manifests produces a new snapshot where files that were present before are added with status `EXISTING` while keeping their original snapshot id. By restricting manifest entries to `EXISTING`, we validate that `RewriteManifests` is not used to add/delete files. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org