deniskuzZ commented on a change in pull request #1087:
URL: https://github.com/apache/hive/pull/1087#discussion_r447033768



##########
File path: 
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStoreChecker.java
##########
@@ -429,6 +451,75 @@ void findUnknownPartitions(Table table, Set<Path> 
partPaths,
     LOG.debug("Number of partitions not in metastore : " + 
result.getPartitionsNotInMs().size());
   }
 
+  /**
+   * Calculate the maximum seen writeId from the acid directory structure
+   * @param partPath Path of the partition directory
+   * @param res Partition result to write the max ids
+   * @throws IOException ex
+   */
+  private void setMaxTxnAndWriteIdFromPartition(Path partPath, 
CheckResult.PartitionResult res) throws IOException {
+    FileSystem fs = partPath.getFileSystem(conf);
+    FileStatus[] deltaOrBaseFiles = fs.listStatus(partPath, 
HIDDEN_FILES_PATH_FILTER);
+
+    // Read the writeIds from every base and delta directory and find the max
+    long maxWriteId = 0L;
+    long maxVisibilityId = 0L;
+    for(FileStatus fileStatus : deltaOrBaseFiles) {
+      if (!fileStatus.isDirectory()) {
+        continue;
+      }
+      long writeId = 0L;
+      long visibilityId = 0L;
+      String folder = fileStatus.getPath().getName();
+      if (folder.startsWith(BASE_PREFIX)) {
+        visibilityId = getVisibilityTxnId(folder);
+        if (visibilityId > 0) {
+          folder = removeVisibilityTxnId(folder);
+        }
+        writeId = Long.parseLong(folder.substring(BASE_PREFIX.length()));
+      } else if (folder.startsWith(DELTA_PREFIX) || 
folder.startsWith(DELETE_DELTA_PREFIX)) {
+        // See AcidUtils.parseDelta
+        visibilityId = getVisibilityTxnId(folder);
+        if (visibilityId > 0) {
+          folder = removeVisibilityTxnId(folder);
+        }
+        boolean isDeleteDelta = folder.startsWith(DELETE_DELTA_PREFIX);
+        String rest = folder.substring((isDeleteDelta ? DELETE_DELTA_PREFIX : 
DELTA_PREFIX).length());
+        int split = rest.indexOf('_');
+        //split2 may be -1 if no statementId
+        int split2 = rest.indexOf('_', split + 1);
+        // We always want the second part (it is either the same or greater if 
it is a compacted delta)
+        writeId = split2 == -1 ? Long.parseLong(rest.substring(split + 1)) : 
Long
+            .parseLong(rest.substring(split + 1, split2));
+      }
+      if (writeId > maxWriteId) {
+        maxWriteId = writeId;
+      }
+      if (visibilityId > maxVisibilityId) {
+        maxVisibilityId = visibilityId;
+      }
+    }
+    LOG.debug("Max writeId {}, max txnId {} found in partition {}", 
maxWriteId, maxVisibilityId,
+        partPath.toUri().toString());
+    res.setMaxWriteId(maxWriteId);
+    res.setMaxTxnId(maxVisibilityId);
+  }
+  private long getVisibilityTxnId(String folder) {
+    int idxOfVis = folder.indexOf(VISIBILITY_PREFIX);

Review comment:
       why not use regex with pattern matching? removeVisibilityTxnId probably 
wouldn't even be needed




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Reply via email to