manojpec commented on a change in pull request #4092:
URL: https://github.com/apache/hudi/pull/4092#discussion_r757685984



##########
File path: 
hudi-cli/src/main/java/org/apache/hudi/cli/commands/MetadataCommand.java
##########
@@ -152,72 +163,192 @@ public String stats() throws IOException {
         config, HoodieCLI.basePath, "/tmp");
     Map<String, String> stats = metadata.stats();
 
-    StringBuffer out = new StringBuffer("\n");
-    out.append(String.format("Base path: %s\n", 
getMetadataTableBasePath(HoodieCLI.basePath)));
+    final List<Comparable[]> rows = new ArrayList<>();
     for (Map.Entry<String, String> entry : stats.entrySet()) {
-      out.append(String.format("%s: %s\n", entry.getKey(), entry.getValue()));
+      Comparable[] row = new Comparable[2];
+      row[0] = entry.getKey();
+      row[1] = entry.getValue();
+      rows.add(row);
     }
 
-    return out.toString();
+    TableHeader header = new TableHeader()
+        .addTableHeaderField("stat key")
+        .addTableHeaderField("stat value");
+    return HoodiePrintHelper.print(header, new HashMap<>(), "",
+        false, Integer.MAX_VALUE, false, rows);
   }
 
-  @CliCommand(value = "metadata list-partitions", help = "Print a list of all 
partitions from the metadata")
+  @CliCommand(value = "metadata list-partitions", help = "List all partitions 
from metadata")
   public String listPartitions() throws IOException {
     HoodieCLI.getTableMetaClient();
     initJavaSparkContext();
     HoodieMetadataConfig config = 
HoodieMetadataConfig.newBuilder().enable(true).build();
     HoodieBackedTableMetadata metadata = new HoodieBackedTableMetadata(new 
HoodieSparkEngineContext(jsc), config,
         HoodieCLI.basePath, "/tmp");
 
-    StringBuffer out = new StringBuffer("\n");
     if (!metadata.enabled()) {
-      out.append("=== Metadata Table not initilized. Using file listing to get 
list of partitions. ===\n\n");
+      return "[ERROR] Metadata Table not enabled/initialized\n\n";
     }
 
-    long t1 = System.currentTimeMillis();
+    HoodieTimer timer = new HoodieTimer().startTimer();
     List<String> partitions = metadata.getAllPartitionPaths();
-    long t2 = System.currentTimeMillis();
-
-    int[] count = {0};
-    partitions.stream().sorted((p1, p2) -> p2.compareTo(p1)).forEach(p -> {
-      out.append(p);
-      if (++count[0] % 15 == 0) {
-        out.append("\n");
-      } else {
-        out.append(", ");
-      }
+    LOG.debug("Took " + timer.endTimer() + " ms");
+
+    final List<Comparable[]> rows = new ArrayList<>();
+    partitions.stream().sorted(Comparator.reverseOrder()).forEach(p -> {
+      Comparable[] row = new Comparable[1];
+      row[0] = p;
+      rows.add(row);
+      LOG.debug(">> partition " + p);
     });
 
-    out.append(String.format("\n\n=== List of partitions retrieved in %.2fsec 
===", (t2 - t1) / 1000.0));
-
-    return out.toString();
+    TableHeader header = new TableHeader().addTableHeaderField("partition");
+    return HoodiePrintHelper.print(header, new HashMap<>(), "",
+        false, Integer.MAX_VALUE, false, rows);
   }
 
   @CliCommand(value = "metadata list-files", help = "Print a list of all files 
in a partition from the metadata")
   public String listFiles(
-      @CliOption(key = {"partition"}, help = "Name of the partition to list 
files", mandatory = true)
-      final String partition) throws IOException {
+      @CliOption(key = {"partition"}, help = "Name of the partition to list 
files", mandatory = true) final String partition) throws IOException {
     HoodieCLI.getTableMetaClient();
     HoodieMetadataConfig config = 
HoodieMetadataConfig.newBuilder().enable(true).build();
-    HoodieBackedTableMetadata metaReader = new HoodieBackedTableMetadata(new 
HoodieLocalEngineContext(HoodieCLI.conf), config, HoodieCLI.basePath, "/tmp");
+    HoodieBackedTableMetadata metaReader = new HoodieBackedTableMetadata(
+        new HoodieLocalEngineContext(HoodieCLI.conf), config, 
HoodieCLI.basePath, "/tmp");
 
-    StringBuffer out = new StringBuffer("\n");
     if (!metaReader.enabled()) {
-      out.append("=== Metadata Table not initialized. Using file listing to 
get list of files in partition. ===\n\n");
+      return "[ERROR] Metadata Table not enabled/initialized\n\n";
     }
 
-    long t1 = System.currentTimeMillis();
+    HoodieTimer timer = new HoodieTimer().startTimer();
     FileStatus[] statuses = metaReader.getAllFilesInPartition(new 
Path(HoodieCLI.basePath, partition));
-    long t2 = System.currentTimeMillis();
+    LOG.debug("Took " + timer.endTimer() + " ms");
 
-    Arrays.stream(statuses).sorted((p1, p2) -> 
p2.getPath().getName().compareTo(p1.getPath().getName())).forEach(p -> {
-      out.append("\t" + p.getPath().getName());
-      out.append("\n");
+    final List<Comparable[]> rows = new ArrayList<>();
+    Arrays.stream(statuses).sorted((p1, p2) -> 
p2.getPath().getName().compareTo(p1.getPath().getName())).forEach(f -> {
+      Comparable[] row = new Comparable[1];
+      row[0] = f;
+      rows.add(row);
     });
 
-    out.append(String.format("\n=== Files in partition retrieved in %.2fsec 
===", (t2 - t1) / 1000.0));
+    TableHeader header = new TableHeader().addTableHeaderField("file path");
+    return HoodiePrintHelper.print(header, new HashMap<>(), "",
+        false, Integer.MAX_VALUE, false, rows);
+  }
+
+  @CliCommand(value = "metadata validate-files", help = "Validate all files in 
all partitions from the metadata")
+  public String validateFiles(
+      @CliOption(key = {"verbose"}, help = "Print all file details", 
unspecifiedDefaultValue = "false") final boolean verbose) throws IOException {
+    HoodieCLI.getTableMetaClient();
+    HoodieMetadataConfig config = 
HoodieMetadataConfig.newBuilder().enable(true).build();
+    HoodieBackedTableMetadata metaReader = new HoodieBackedTableMetadata(
+        new HoodieLocalEngineContext(HoodieCLI.conf), config, 
HoodieCLI.basePath, "/tmp");
+
+    if (!metaReader.enabled()) {
+      return "[ERROR] Metadata Table not enabled/initialized\n\n";
+    }
 
-    return out.toString();
+    HoodieMetadataConfig fsConfig = 
HoodieMetadataConfig.newBuilder().enable(false).build();
+    HoodieBackedTableMetadata fsMetaReader = new HoodieBackedTableMetadata(
+        new HoodieLocalEngineContext(HoodieCLI.conf), fsConfig, 
HoodieCLI.basePath, "/tmp");
+
+    HoodieTimer timer = new HoodieTimer().startTimer();
+    List<String> metadataPartitions = metaReader.getAllPartitionPaths();
+    List<String> fsPartitions = fsMetaReader.getAllPartitionPaths();
+    Collections.sort(fsPartitions);
+    Collections.sort(metadataPartitions);
+
+    Set<String> allPartitions = new HashSet<>();
+    allPartitions.addAll(fsPartitions);
+    allPartitions.addAll(metadataPartitions);
+
+    LOG.info("All FS partitions count " + fsPartitions.size() + ", metadata 
partition count " + metadataPartitions.size());
+    LOG.info("Partitions equality " + fsPartitions.equals(metadataPartitions));

Review comment:
       Removed these and in the later section have error logging when the 
counts don't match




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to