dramaticlly commented on code in PR #13837:
URL: https://github.com/apache/iceberg/pull/13837#discussion_r2291811651


##########
spark/v4.0/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteTablePathProcedure.java:
##########
@@ -171,8 +177,79 @@ public void testProcedureWithInvalidInput() {
             "Cannot find provided version file %s in metadata log.", 
"v11.metadata.json");
   }
 
+  @TestTemplate
+  public void testRewriteTablePathWithSkipFileList() {
+    String location = targetTableDir.toFile().toURI().toString();
+    Table table = validationCatalog.loadTable(tableIdent);
+    String metadataJson = TableUtil.metadataFileLocation(table);
+
+    List<Object[]> result =
+        sql(
+            "CALL %s.system.rewrite_table_path(table => '%s', source_prefix => 
'%s', target_prefix => '%s', skip_file_list => true)",
+            catalogName, tableIdent, table.location(), location);
+    assertThat(result).hasSize(1);
+    assertThat(result.get(0)[0])
+        .as("Should return correct latest version")
+        .isEqualTo(RewriteTablePathUtil.fileName(metadataJson));
+    assertThat(result.get(0)[1]).as("Should return 
empty").asString().isEqualTo("");
+  }
+
   private void checkFileListLocationCount(String fileListLocation, long 
expectedFileCount) {
     long fileCount = 
spark.read().format("text").load(fileListLocation).count();
     assertThat(fileCount).isEqualTo(expectedFileCount);
   }
+
+  @TestTemplate
+  public void testRewriteTablePathWithManifestAndDeleteCounts() throws 
IOException {
+
+    sql("INSERT INTO %s VALUES (1, 'a')", tableName);
+    sql("INSERT INTO %s VALUES (2, 'b')", tableName);
+    sql("INSERT INTO %s VALUES (3, 'c')", tableName);
+
+    Table table = validationCatalog.loadTable(tableIdent);
+    List<Pair<CharSequence, Long>> deletes =
+        Lists.newArrayList(
+            Pair.of(
+                
table.currentSnapshot().addedDataFiles(table.io()).iterator().next().location(),
+                0L));
+
+    File file = new File(removePrefix(table.location()) + 
"/data/deletes.parquet");
+    DeleteFile positionDeletes =
+        FileHelpers.writeDeleteFile(
+                table, table.io().newOutputFile(file.toURI().toString()), 
deletes)
+            .first();
+
+    table.newRowDelta().addDeletes(positionDeletes).commit();
+
+    sql("INSERT INTO %s VALUES (4, 'd')", tableName);
+
+    String targetLocation = targetTableDir.toFile().toURI().toString();
+    String stagingLocation = staging.toFile().toURI().toString();
+
+    List<Object[]> result =
+        sql(
+            "CALL %s.system.rewrite_table_path("
+                + "table => '%s', "
+                + "source_prefix => '%s', "
+                + "target_prefix => '%s', "
+                + "staging_location => '%s')",
+            catalogName, tableIdent, table.location(), targetLocation, 
stagingLocation);
+
+    assertThat(result).hasSize(1);
+    Object[] row = result.get(0);
+
+    int rewrittenManifestFilesCount = ((Number) row[2]).intValue();
+    int rewrittenDeleteFilesCount = ((Number) row[3]).intValue();
+
+    assertThat(rewrittenDeleteFilesCount)
+        .as("Should rewrite at least one delete file")
+        .isGreaterThan(0);
+    assertThat(rewrittenManifestFilesCount)
+        .as("Should rewrite at least one manifest file")
+        .isGreaterThan(0);

Review Comment:
   can we assert on equality of those rewritten file count? This is to ensure 
we have number correctly in procedure output



##########
spark/v4.0/spark/src/main/java/org/apache/iceberg/spark/actions/RewriteTablePathSparkAction.java:
##########
@@ -304,12 +307,29 @@ private String rebuildMetadata() {
             .collect(Collectors.toSet());
     rewritePositionDeletes(deleteFiles);
 
+    ImmutableRewriteTablePath.Result.Builder builder =
+        ImmutableRewriteTablePath.Result.builder()
+            .stagingLocation(stagingDir)
+            .rewrittenDeleteFilesCount(deleteFiles.size())
+            .rewrittenManifestFilesCount(metaFiles.size())
+            .latestVersion(RewriteTablePathUtil.fileName(endVersionName));
+
+    // skip file list
+    if (skipFileList) {
+      return builder.build();
+    }
+
     Set<Pair<String, String>> copyPlan = Sets.newHashSet();
     copyPlan.addAll(rewriteVersionResult.copyPlan());
     copyPlan.addAll(rewriteManifestListResult.copyPlan());
     copyPlan.addAll(rewriteManifestResult.copyPlan());
+    String fileListLocation = saveFileList(copyPlan);
 
-    return saveFileList(copyPlan);
+    return builder
+        .stagingLocation(stagingDir)
+        .fileListLocation(fileListLocation)

Review Comment:
   @nastra I am curious, does API contract allow String typed fileListLocation 
to have value null? I assume line 319 once built without fileListLocation will 
have its value = null



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to