steveloughran commented on a change in pull request #1691: HADOOP-16424.
S3Guard fsck: Check internal consistency of the MetadataStore
URL: https://github.com/apache/hadoop/pull/1691#discussion_r349162244
##########
File path:
hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardFsck.java
##########
@@ -396,6 +413,234 @@ public Path getPath() {
}
}
+ /**
+ * Check the DynamoDB metadatastore internally for consistency.
+ * <pre>
+ * Tasks to do here:
+ * - find orphan entries (entries without a parent).
+ * - find if a file's parent is not a directory (so the parent is a file).
+ * - find entries where the parent is a tombstone.
+ * - warn: no lastUpdated field.
+ * </pre>
+ */
+ public List<ComparePair> checkDdbInternalConsistency (Path basePath)
+ throws IOException {
+ Preconditions.checkArgument(basePath.isAbsolute(), "path must be
absolute");
+
+ List<ComparePair> comparePairs = new ArrayList<>();
+ String rootStr = basePath.toString();
+ LOG.info("Root for internal consistency check: {}", rootStr);
+ StopWatch stopwatch = new StopWatch();
+ stopwatch.start();
+
+ final Table table = metadataStore.getTable();
+ final String username = metadataStore.getUsername();
+ DDBTree ddbTree = new DDBTree();
+
+ /*
+ * I. Root node construction
+ * - If the root node is the real bucket root, a node is constructed
instead of
+ * doing a query to the ddb because the bucket root is not stored.
+ * - If the root node is not a real bucket root then the entry is queried
from
+ * the ddb and constructed from the result.
+ */
+
+ DDBPathMetadata baseMeta;
+
+ if (!basePath.isRoot()) {
+ PrimaryKey rootKey = pathToKey(basePath);
+ final GetItemSpec spec = new GetItemSpec()
+ .withPrimaryKey(rootKey)
+ .withConsistentRead(true);
+ final Item baseItem = table.getItem(spec);
+ baseMeta = itemToPathMetadata(baseItem, username);
+
+ if (baseMeta == null) {
+ throw new FileNotFoundException(
+ "Base element metadata is null. " +
+ "This means the base path element is missing, or wrong path is
" +
Review comment:
nit: prefer "was passed"
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]