aokolnychyi commented on a change in pull request #2503:
URL: https://github.com/apache/iceberg/pull/2503#discussion_r627673224



##########
File path: 
spark/src/main/java/org/apache/iceberg/spark/actions/BaseExpireSnapshotsSparkAction.java
##########
@@ -181,10 +184,29 @@ public BaseExpireSnapshotsSparkAction 
deleteWith(Consumer<String> newDeleteFunc)
 
   @Override
   public ExpireSnapshots.Result execute() {
-    JobGroupInfo info = newJobGroupInfo("EXPIRE-SNAPSHOTS", 
"EXPIRE-SNAPSHOTS");
+    JobGroupInfo info = newJobGroupInfo("EXPIRE-SNAPSHOTS", getDescription());
     return withJobGroupInfo(info, this::doExecute);
   }
 
+  private String getDescription() {

Review comment:
       Iceberg usually tries to avoid `get` prefix. What about calling it 
`jobDesc`? That's short and descriptive enough.

##########
File path: 
spark/src/main/java/org/apache/iceberg/spark/actions/BaseExpireSnapshotsSparkAction.java
##########
@@ -181,10 +184,29 @@ public BaseExpireSnapshotsSparkAction 
deleteWith(Consumer<String> newDeleteFunc)
 
   @Override
   public ExpireSnapshots.Result execute() {
-    JobGroupInfo info = newJobGroupInfo("EXPIRE-SNAPSHOTS", 
"EXPIRE-SNAPSHOTS");
+    JobGroupInfo info = newJobGroupInfo("EXPIRE-SNAPSHOTS", getDescription());
     return withJobGroupInfo(info, this::doExecute);
   }
 
+  private String getDescription() {
+    List<String> msgs = new ArrayList<>();
+    if (expireOlderThanValue != null) {
+      msgs.add("older_than=" + expireOlderThanValue);
+    }
+    if (retainLastValue != null) {
+      msgs.add("retain_last=" + retainLastValue);
+    }
+    if (!expiredSnapshotIds.isEmpty()) {
+      Long first = expiredSnapshotIds.stream().findFirst().get();
+      if (expiredSnapshotIds.size() > 1) {
+        msgs.add(String.format("snapshot_ids: %s(%s more...)", first, 
expiredSnapshotIds.size() - 1));

Review comment:
       nit: let's add a space before `(`

##########
File path: 
spark/src/main/java/org/apache/iceberg/spark/actions/BaseExpireSnapshotsSparkAction.java
##########
@@ -181,10 +184,29 @@ public BaseExpireSnapshotsSparkAction 
deleteWith(Consumer<String> newDeleteFunc)
 
   @Override
   public ExpireSnapshots.Result execute() {
-    JobGroupInfo info = newJobGroupInfo("EXPIRE-SNAPSHOTS", 
"EXPIRE-SNAPSHOTS");
+    JobGroupInfo info = newJobGroupInfo("EXPIRE-SNAPSHOTS", getDescription());
     return withJobGroupInfo(info, this::doExecute);
   }
 
+  private String getDescription() {
+    List<String> msgs = new ArrayList<>();

Review comment:
       What about calling it options and using Lists?
   
   ```
   List<String> options = Lists.newArrayList();
   ```

##########
File path: 
spark/src/main/java/org/apache/iceberg/spark/actions/BaseRemoveOrphanFilesSparkAction.java
##########
@@ -140,10 +142,19 @@ public BaseRemoveOrphanFilesSparkAction 
deleteWith(Consumer<String> newDeleteFun
 
   @Override
   public RemoveOrphanFiles.Result execute() {
-    JobGroupInfo info = newJobGroupInfo("REMOVE-ORPHAN-FILES", 
"REMOVE-ORPHAN-FILES");
+    JobGroupInfo info = newJobGroupInfo("REMOVE-ORPHAN-FILES", 
getDescription());
     return withJobGroupInfo(info, this::doExecute);
   }
 
+  private String getDescription() {
+    List<String> msg = new ArrayList<>();
+    msg.add("older_than=" + olderThanTimestamp);
+    if (location != null) {
+      msg.add("location=" + location);
+    }
+    return String.format("Removing orphan files(%s) from %s", 
StringUtils.join(msg, ","), table.name());

Review comment:
       We also need to add dry_run to the description.

##########
File path: 
spark3/src/main/java/org/apache/iceberg/spark/actions/BaseSnapshotTableSparkAction.java
##########
@@ -109,7 +109,8 @@ public SnapshotTable tableProperty(String property, String 
value) {
 
   @Override
   public SnapshotTable.Result execute() {
-    JobGroupInfo info = newJobGroupInfo("SNAPSHOT-TABLE", "SNAPSHOT-TABLE");
+    JobGroupInfo info = newJobGroupInfo("SNAPSHOT-TABLE",
+        String.format("Snapshotting table %s(location=%s)", 
sourceTableIdent().toString(), destTableLocation));

Review comment:
       I think it is important to show the dest table identifier. Also, the 
location can be null if not provided. Since we are not showing table props and 
the location can be long, I'll be fine just saying this:
   
   ```
   Snapshotting table %s as %s
   ```

##########
File path: 
spark/src/main/java/org/apache/iceberg/spark/actions/BaseExpireSnapshotsSparkAction.java
##########
@@ -181,10 +184,29 @@ public BaseExpireSnapshotsSparkAction 
deleteWith(Consumer<String> newDeleteFunc)
 
   @Override
   public ExpireSnapshots.Result execute() {
-    JobGroupInfo info = newJobGroupInfo("EXPIRE-SNAPSHOTS", 
"EXPIRE-SNAPSHOTS");
+    JobGroupInfo info = newJobGroupInfo("EXPIRE-SNAPSHOTS", getDescription());
     return withJobGroupInfo(info, this::doExecute);
   }
 
+  private String getDescription() {
+    List<String> msgs = new ArrayList<>();
+    if (expireOlderThanValue != null) {

Review comment:
       Can we add empty lines between these blocks?
   
   ```
       List<String> options = ...
   
       if (expireOlderThanValue != null) {
         ...
       }
   
       if (retainLastValue != null) {
         ....
       }
   
       if (!expiredSnapshotIds.isEmpty()) {
         ...
       }
   
       return ...
   ```

##########
File path: 
spark/src/main/java/org/apache/iceberg/spark/actions/BaseRemoveOrphanFilesSparkAction.java
##########
@@ -140,10 +142,19 @@ public BaseRemoveOrphanFilesSparkAction 
deleteWith(Consumer<String> newDeleteFun
 
   @Override
   public RemoveOrphanFiles.Result execute() {
-    JobGroupInfo info = newJobGroupInfo("REMOVE-ORPHAN-FILES", 
"REMOVE-ORPHAN-FILES");
+    JobGroupInfo info = newJobGroupInfo("REMOVE-ORPHAN-FILES", 
getDescription());
     return withJobGroupInfo(info, this::doExecute);
   }
 
+  private String getDescription() {

Review comment:
       Same here, jobDesc, List<String> options, extra spaces, JOINER

##########
File path: 
spark/src/main/java/org/apache/iceberg/spark/actions/BaseExpireSnapshotsSparkAction.java
##########
@@ -19,11 +19,14 @@
 
 package org.apache.iceberg.spark.actions;
 
+import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Set;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.Consumer;
+import org.apache.commons.lang3.StringUtils;

Review comment:
       I think Iceberg mostly uses `Joiner` from Guava for this purpose.
   
   ```
   private static final Joiner COMMA = Joiner.on(",");
   
   COMMA.join(options)
   ```

##########
File path: 
spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteManifestsSparkAction.java
##########
@@ -142,7 +142,8 @@ public RewriteManifests stagingLocation(String 
newStagingLocation) {
 
   @Override
   public RewriteManifests.Result execute() {
-    JobGroupInfo info = newJobGroupInfo("REWRITE-MANIFESTS", 
"REWRITE-MANIFESTS");
+    JobGroupInfo info = newJobGroupInfo("REWRITE-MANIFESTS",
+        String.format("Rewriting manifests(staging location=%s) of %s", 
stagingLocation, table.name()));

Review comment:
       Let's add space before `(`

##########
File path: 
spark/src/main/java/org/apache/iceberg/spark/actions/BaseRewriteManifestsSparkAction.java
##########
@@ -142,7 +142,8 @@ public RewriteManifests stagingLocation(String 
newStagingLocation) {
 
   @Override
   public RewriteManifests.Result execute() {
-    JobGroupInfo info = newJobGroupInfo("REWRITE-MANIFESTS", 
"REWRITE-MANIFESTS");
+    JobGroupInfo info = newJobGroupInfo("REWRITE-MANIFESTS",
+        String.format("Rewriting manifests(staging location=%s) of %s", 
stagingLocation, table.name()));

Review comment:
       Can we move this into a separate var and call it `desc` like below?

##########
File path: 
spark/src/main/java/org/apache/iceberg/spark/actions/BaseExpireSnapshotsSparkAction.java
##########
@@ -181,10 +184,29 @@ public BaseExpireSnapshotsSparkAction 
deleteWith(Consumer<String> newDeleteFunc)
 
   @Override
   public ExpireSnapshots.Result execute() {
-    JobGroupInfo info = newJobGroupInfo("EXPIRE-SNAPSHOTS", 
"EXPIRE-SNAPSHOTS");
+    JobGroupInfo info = newJobGroupInfo("EXPIRE-SNAPSHOTS", getDescription());
     return withJobGroupInfo(info, this::doExecute);
   }
 
+  private String getDescription() {
+    List<String> msgs = new ArrayList<>();
+    if (expireOlderThanValue != null) {
+      msgs.add("older_than=" + expireOlderThanValue);
+    }
+    if (retainLastValue != null) {
+      msgs.add("retain_last=" + retainLastValue);
+    }
+    if (!expiredSnapshotIds.isEmpty()) {
+      Long first = expiredSnapshotIds.stream().findFirst().get();
+      if (expiredSnapshotIds.size() > 1) {
+        msgs.add(String.format("snapshot_ids: %s(%s more...)", first, 
expiredSnapshotIds.size() - 1));
+      } else {
+        msgs.add(String.format("snapshot_id: %s", first));
+      }
+    }
+    return String.format("Expiring snapshots(%s) in %s", 
StringUtils.join(msgs, ","), table.name());

Review comment:
       Can we add a space before `(`?
   
   ```
   Expiring snapshots (%s) in %s
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to