mukund-thakur commented on a change in pull request #2149: URL: https://github.com/apache/hadoop/pull/2149#discussion_r468608626
########## File path: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/DirMarkerTracker.java ########## @@ -0,0 +1,323 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus; + +/** + * Tracks directory markers which have been reported in object listings. + * This is needed for auditing and cleanup, including during rename + * operations. + * <p></p> + * Designed to be used while scanning through the results of listObject + * calls, where are we assume the results come in alphanumeric sort order + * and parent entries before children. + * <p></p> + * This lets as assume that we can identify all leaf markers as those + * markers which were added to set of leaf markers and not subsequently + * removed as a child entries were discovered. + * <p></p> + * To avoid scanning datastructures excessively, the path of the parent + * directory of the last file added is cached. This allows for a + * quick bailout when many children of the same directory are + * returned in a listing. + */ +public class DirMarkerTracker { + + private static final Logger LOG = + LoggerFactory.getLogger(DirMarkerTracker.class); + + /** + * all leaf markers. + */ + private final Map<Path, Marker> leafMarkers + = new TreeMap<>(); + + /** + * all surplus markers. + */ + private final Map<Path, Marker> surplusMarkers + = new TreeMap<>(); + + private final Path basePath; + + /** + * last parent directory checked. + */ + private Path lastDirChecked; + + /** + * Count of scans; used for test assertions. + */ + private int scanCount; + + /** + * How many files were found. + */ + private int filesFound; + + /** + * How many markers were found. + */ + private int markersFound; + + /** + * How many objects of any kind were found? + */ + private int objectsFound; + + /** + * Construct. + * Base path is currently only used for information rather than validating + * paths supplied in other mathods. + * @param basePath base path of track + */ + public DirMarkerTracker(final Path basePath) { + this.basePath = basePath; + } + + /** + * Get the base path of the tracker. + * @return the path + */ + public Path getBasePath() { + return basePath; + } + + /** + * A marker has been found; this may or may not be a leaf. + * Trigger a move of all markers above it into the surplus map. + * @param path marker path + * @param key object key + * @param source listing source + * @return the surplus markers found. + */ + public List<Marker> markerFound(Path path, + final String key, + final S3ALocatedFileStatus source) { + markersFound++; + leafMarkers.put(path, new Marker(path, key, source)); + return pathFound(path, key, source); + } + + /** + * A file has been found. Trigger a move of all + * markers above it into the surplus map. + * @param path marker path + * @param key object key + * @param source listing source + * @return the surplus markers found. + */ + public List<Marker> fileFound(Path path, + final String key, + final S3ALocatedFileStatus source) { + filesFound++; + return pathFound(path, key, source); + } + + /** + * A path has been found. Trigger a move of all + * markers above it into the surplus map. + * @param path marker path + * @param key object key + * @param source listing source + * @return the surplus markers found. + */ + private List<Marker> pathFound(Path path, + final String key, + final S3ALocatedFileStatus source) { Review comment: Even if we need all three we could just pass the Marker() instance? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
