aokolnychyi commented on a change in pull request #512: Extend RewriteManifests 
with a way to add/delete manifests
URL: https://github.com/apache/incubator-iceberg/pull/512#discussion_r331814616
 
 

 ##########
 File path: core/src/main/java/org/apache/iceberg/BaseRewriteManifests.java
 ##########
 @@ -85,45 +90,69 @@ protected String operation() {
 
   @Override
   public RewriteManifests set(String property, String value) {
-    summaryProps.put(property, value);
+    summaryBuilder.set(property, value);
     return this;
   }
 
   @Override
   protected Map<String, String> summary() {
-    Map<String, String> result = new HashMap<>();
-    result.putAll(summaryProps);
-    result.put(KEPT_CNT, Integer.toString(keptManifests.size()));
-    result.put(NEW_CNT, Integer.toString(newManifests.size()));
-    result.put(REPLACED_CNT, Integer.toString(replacedManifests.size()));
-    result.put(ENTRY_CNT, Long.toString(entryCount.get()));
-    return result;
+    summaryBuilder.set(KEPT_CNT, Integer.toString(keptManifests.size()));
+    summaryBuilder.set(NEW_CNT, Integer.toString(newManifests.size()));
+    summaryBuilder.set(REPLACED_CNT, 
Integer.toString(replacedManifests.size()));
+    summaryBuilder.set(ENTRY_CNT, Long.toString(entryCount.get()));
+    return summaryBuilder.build();
   }
 
   @Override
   public RewriteManifests clusterBy(Function<DataFile, Object> func) {
+    Preconditions.checkState(!isDirectReplacement, INVALID_USAGE_ERR);
     this.clusterByFunc = func;
     return this;
   }
 
   @Override
   public RewriteManifests rewriteIf(Predicate<ManifestFile> pred) {
+    Preconditions.checkState(!isDirectReplacement, INVALID_USAGE_ERR);
     this.predicate = pred;
     return this;
   }
 
   @Override
-  public List<ManifestFile> apply(TableMetadata base) {
-    Preconditions.checkNotNull(clusterByFunc, "clusterBy function cannot be 
null");
+  public RewriteManifests deleteManifest(ManifestFile manifest) {
+    Preconditions.checkState(clusterByFunc == null && predicate == null, 
INVALID_USAGE_ERR);
+    isDirectReplacement = true;
+    replacedManifests.add(manifest);
+    return this;
+  }
+
+  @Override
+  public RewriteManifests addManifest(ManifestFile manifest) {
+    Preconditions.checkState(clusterByFunc == null && predicate == null, 
INVALID_USAGE_ERR);
+    isDirectReplacement = true;
+    // the manifest must be rewritten with this update's snapshot ID
+    try (ManifestReader reader = 
ManifestReader.read(ops.io().newInputFile(manifest.path()), 
ops.current()::spec)) {
+      OutputFile newManifestPath = 
manifestPath(manifestSuffix.getAndIncrement());
+      Set<ManifestEntry.Status> allowedStatuses = 
Sets.newHashSet(ManifestEntry.Status.EXISTING);
+      newManifests.add(ManifestWriter.copyManifest(
+          reader, newManifestPath, snapshotId(), summaryBuilder, 
allowedStatuses));
 
 Review comment:
   Similarly to `BaseRewriteManifests`, our utility reads only live entries via 
`liveEntries` in `FilteredManifests` and produces staged manifests that contain 
entries with status `EXISTING`. `ManifestWriter$copyManifest` is changed to 
work for arbitrary use cases. In this particular one, it won't report any files 
as added or removed because all manifest entries have one status: `EXISTING` 
(validated in `addManifest`).

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@iceberg.apache.org
For additional commands, e-mail: issues-h...@iceberg.apache.org

Reply via email to