aokolnychyi commented on a change in pull request #512: Extend RewriteManifests with a way to add/delete manifests URL: https://github.com/apache/incubator-iceberg/pull/512#discussion_r331814616
########## File path: core/src/main/java/org/apache/iceberg/BaseRewriteManifests.java ########## @@ -85,45 +90,69 @@ protected String operation() { @Override public RewriteManifests set(String property, String value) { - summaryProps.put(property, value); + summaryBuilder.set(property, value); return this; } @Override protected Map<String, String> summary() { - Map<String, String> result = new HashMap<>(); - result.putAll(summaryProps); - result.put(KEPT_CNT, Integer.toString(keptManifests.size())); - result.put(NEW_CNT, Integer.toString(newManifests.size())); - result.put(REPLACED_CNT, Integer.toString(replacedManifests.size())); - result.put(ENTRY_CNT, Long.toString(entryCount.get())); - return result; + summaryBuilder.set(KEPT_CNT, Integer.toString(keptManifests.size())); + summaryBuilder.set(NEW_CNT, Integer.toString(newManifests.size())); + summaryBuilder.set(REPLACED_CNT, Integer.toString(replacedManifests.size())); + summaryBuilder.set(ENTRY_CNT, Long.toString(entryCount.get())); + return summaryBuilder.build(); } @Override public RewriteManifests clusterBy(Function<DataFile, Object> func) { + Preconditions.checkState(!isDirectReplacement, INVALID_USAGE_ERR); this.clusterByFunc = func; return this; } @Override public RewriteManifests rewriteIf(Predicate<ManifestFile> pred) { + Preconditions.checkState(!isDirectReplacement, INVALID_USAGE_ERR); this.predicate = pred; return this; } @Override - public List<ManifestFile> apply(TableMetadata base) { - Preconditions.checkNotNull(clusterByFunc, "clusterBy function cannot be null"); + public RewriteManifests deleteManifest(ManifestFile manifest) { + Preconditions.checkState(clusterByFunc == null && predicate == null, INVALID_USAGE_ERR); + isDirectReplacement = true; + replacedManifests.add(manifest); + return this; + } + + @Override + public RewriteManifests addManifest(ManifestFile manifest) { + Preconditions.checkState(clusterByFunc == null && predicate == null, INVALID_USAGE_ERR); + isDirectReplacement = true; + // the manifest must be rewritten with this update's snapshot ID + try (ManifestReader reader = ManifestReader.read(ops.io().newInputFile(manifest.path()), ops.current()::spec)) { + OutputFile newManifestPath = manifestPath(manifestSuffix.getAndIncrement()); + Set<ManifestEntry.Status> allowedStatuses = Sets.newHashSet(ManifestEntry.Status.EXISTING); + newManifests.add(ManifestWriter.copyManifest( + reader, newManifestPath, snapshotId(), summaryBuilder, allowedStatuses)); Review comment: Similarly to `BaseRewriteManifests`, our utility reads only live entries via `liveEntries` in `FilteredManifests` and produces staged manifests that contain entries with status `EXISTING`. `ManifestWriter$copyManifest` is changed to work for arbitrary use cases. In this particular one, it won't report any files as added or removed because all manifest entries have one status: `EXISTING` (validated in `addManifest`). ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org For additional commands, e-mail: issues-h...@iceberg.apache.org