Repository: hadoop Updated Branches: refs/heads/HADOOP-13345 c85e13cb3 -> be03eb6a4
Revert "HADOOP-14144 s3guard: CLI diff non-empty after import on new table. Contributed by Sean Mackrory." Neglected to get clean Yetus run first. This reverts commit c85e13cb34b634d392d2edc51cddf000eff7b062. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/be03eb6a Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/be03eb6a Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/be03eb6a Branch: refs/heads/HADOOP-13345 Commit: be03eb6a4bb7a36ad8553efa476b096334eecf3a Parents: c85e13c Author: Aaron Fabbri <[email protected]> Authored: Thu Apr 6 16:56:58 2017 -0700 Committer: Aaron Fabbri <[email protected]> Committed: Thu Apr 6 16:56:58 2017 -0700 ---------------------------------------------------------------------- .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 93 +++++++------------- .../hadoop/fs/s3a/s3guard/TestS3GuardTool.java | 11 +-- 2 files changed, 36 insertions(+), 68 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/be03eb6a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index ea424d5..fef8e2d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -441,13 +441,11 @@ public abstract class S3GuardTool extends Configured implements Tool { /** * Recursively import every path under path - * @return number of items inserted into MetadataStore */ - private long importDir(FileStatus status) throws IOException { + private void importDir(FileStatus status) throws IOException { Preconditions.checkArgument(status.isDirectory()); RemoteIterator<LocatedFileStatus> it = s3a.listFilesAndDirectories(status.getPath(), true); - long items = 0; while (it.hasNext()) { LocatedFileStatus located = it.next(); @@ -465,9 +463,7 @@ public abstract class S3GuardTool extends Configured implements Tool { } putParentsIfNotPresent(child); ms.put(new PathMetadata(child)); - items++; } - return items; } @Override @@ -497,16 +493,13 @@ public abstract class S3GuardTool extends Configured implements Tool { initMetadataStore(false); - long items = 1; if (status.isFile()) { PathMetadata meta = new PathMetadata(status); ms.put(meta); } else { - items = importDir(status); + importDir(status); } - System.out.printf("Inserted %d items into Metadata Store\n", items); - return SUCCESS; } } @@ -561,55 +554,30 @@ public abstract class S3GuardTool extends Configured implements Tool { * @return the string of output. */ private static String formatFileStatus(FileStatus status) { - return String.format("%s%s%d%s%s", + return String.format("%s%s%s", status.isDirectory() ? "D" : "F", SEP, - status.getLen(), - SEP, status.getPath().toString()); } /** - * Compares metadata from 2 S3 FileStatus's to see if they differ - * @param thisOne - * @param thatOne - * @return true if the metadata is not identical - */ - private static boolean differ(FileStatus thisOne, FileStatus thatOne) { - Preconditions.checkArgument(!(thisOne == null && thatOne == null)); - return (thisOne == null || thatOne == null) || - (thisOne.getLen() != thatOne.getLen()) || - (thisOne.isDirectory() != thatOne.isDirectory()) || - (!thisOne.isDirectory() && - thisOne.getModificationTime() != thatOne.getModificationTime()); - } - - /** * Print difference, if any, between two file statuses to the output stream. * - * @param msStatus file status from metadata store. - * @param s3Status file status from S3. + * @param statusFromMS file status from metadata store. + * @param statusFromS3 file status from S3. * @param out output stream. */ - private static void printDiff(FileStatus msStatus, - FileStatus s3Status, + private static void printDiff(FileStatus statusFromMS, + FileStatus statusFromS3, PrintStream out) { - Preconditions.checkArgument(!(msStatus == null && s3Status == null)); - if (msStatus != null && s3Status != null) { - Preconditions.checkArgument( - msStatus.getPath().equals(s3Status.getPath()), - String.format("The path from metadata store and s3 are different:" + - " ms=%s s3=%s", msStatus.getPath(), s3Status.getPath())); - } - - if (differ(msStatus, s3Status)) { - if (s3Status != null) { - out.printf("%s%s%s%n", S3_PREFIX, SEP, formatFileStatus(s3Status)); - } - if (msStatus != null) { - out.printf("%s%s%s%n", MS_PREFIX, SEP, formatFileStatus(msStatus)); - } + Preconditions.checkArgument( + !(statusFromMS == null && statusFromS3 == null)); + if (statusFromMS == null) { + out.printf("%s%s%s%n", S3_PREFIX, SEP, formatFileStatus(statusFromS3)); + } else if (statusFromS3 == null) { + out.printf("%s%s%s%n", MS_PREFIX, SEP, formatFileStatus(statusFromMS)); } + // TODO: Do we need to compare the internal fields of two FileStatuses? } /** @@ -629,28 +597,32 @@ public abstract class S3GuardTool extends Configured implements Tool { */ private void compareDir(FileStatus msDir, FileStatus s3Dir, PrintStream out) throws IOException { - Preconditions.checkArgument(!(msDir == null && s3Dir == null)); + if (msDir == null && s3Dir == null) { + return; + } if (msDir != null && s3Dir != null) { Preconditions.checkArgument(msDir.getPath().equals(s3Dir.getPath()), String.format("The path from metadata store and s3 are different:" + - " ms=%s s3=%s", msDir.getPath(), s3Dir.getPath())); + " ms=%s s3=%s", msDir.getPath(), s3Dir.getPath())); } - Map<Path, FileStatus> s3Children = new HashMap<>(); + printDiff(msDir, s3Dir, out); + Map<Path, S3AFileStatus> s3Children = new HashMap<>(); if (s3Dir != null && s3Dir.isDirectory()) { for (FileStatus status : s3a.listStatus(s3Dir.getPath())) { - s3Children.put(status.getPath(), status); + Preconditions.checkState(status instanceof S3AFileStatus); + s3Children.put(status.getPath(), (S3AFileStatus) status); } } - Map<Path, FileStatus> msChildren = new HashMap<>(); + Map<Path, S3AFileStatus> msChildren = new HashMap<>(); if (msDir != null && msDir.isDirectory()) { DirListingMetadata dirMeta = ms.listChildren(msDir.getPath()); if (dirMeta != null) { for (PathMetadata meta : dirMeta.getListing()) { - FileStatus status = meta.getFileStatus(); + S3AFileStatus status = (S3AFileStatus) meta.getFileStatus(); msChildren.put(status.getPath(), status); } } @@ -660,12 +632,12 @@ public abstract class S3GuardTool extends Configured implements Tool { allPaths.addAll(msChildren.keySet()); for (Path path : allPaths) { - FileStatus s3Status = s3Children.get(path); - FileStatus msStatus = msChildren.get(path); - printDiff(msStatus, s3Status, out); - if ((s3Status != null && s3Status.isDirectory()) || + S3AFileStatus s3status = s3Children.get(path); + S3AFileStatus msStatus = msChildren.get(path); + printDiff(msStatus, s3status, out); + if ((s3status != null && s3status.isDirectory()) || (msStatus != null && msStatus.isDirectory())) { - compareDir(msStatus, s3Status, out); + compareDir(msStatus, s3status, out); } } out.flush(); @@ -678,7 +650,7 @@ public abstract class S3GuardTool extends Configured implements Tool { * @param out the output stream to display results. * @throws IOException */ - private void compareRoot(Path path, PrintStream out) throws IOException { + private void compare(Path path, PrintStream out) throws IOException { Path qualified = s3a.qualify(path); FileStatus s3Status = null; try { @@ -686,7 +658,8 @@ public abstract class S3GuardTool extends Configured implements Tool { } catch (FileNotFoundException e) { } PathMetadata meta = ms.get(qualified); - FileStatus msStatus = meta != null ? meta.getFileStatus() : null; + S3AFileStatus msStatus = meta != null ? + (S3AFileStatus) meta.getFileStatus() : null; compareDir(msStatus, s3Status, out); } @@ -714,7 +687,7 @@ public abstract class S3GuardTool extends Configured implements Tool { root = new Path(uri.getPath()); } root = s3a.qualify(root); - compareRoot(root, out); + compare(root, out); out.flush(); return SUCCESS; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/be03eb6a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java index 288daa0..8874511 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/TestS3GuardTool.java @@ -117,7 +117,6 @@ public class TestS3GuardTool extends S3GuardToolTestBase { Set<Path> actualOnS3 = new HashSet<>(); Set<Path> actualOnMS = new HashSet<>(); - boolean duplicates = false; try (ByteArrayInputStream in = new ByteArrayInputStream(buf.toByteArray())) { try (BufferedReader reader = @@ -126,15 +125,12 @@ public class TestS3GuardTool extends S3GuardToolTestBase { while ((line = reader.readLine()) != null) { String[] fields = line.split("\\s"); assertEquals("[" + line + "] does not have enough fields", - 4, fields.length); + 3, fields.length); String where = fields[0]; - Path path = new Path(fields[3]); if (Diff.S3_PREFIX.equals(where)) { - duplicates = duplicates || actualOnS3.contains(path); - actualOnS3.add(path); + actualOnS3.add(new Path(fields[2])); } else if (Diff.MS_PREFIX.equals(where)) { - duplicates = duplicates || actualOnMS.contains(path); - actualOnMS.add(path); + actualOnMS.add(new Path(fields[2])); } else { fail("Unknown prefix: " + where); } @@ -145,6 +141,5 @@ public class TestS3GuardTool extends S3GuardToolTestBase { assertEquals("Mismatched metadata store outputs: " + actualOut, filesOnMS, actualOnMS); assertEquals("Mismatched s3 outputs: " + actualOut, filesOnS3, actualOnS3); - assertFalse("Diff contained duplicates", duplicates); } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
