mackrorysd commented on a change in pull request #843: HADOOP-15183 S3Guard store becomes inconsistent after partial failure of rename URL: https://github.com/apache/hadoop/pull/843#discussion_r292568060
########## File path: hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/ProgressiveRenameTracker.java ########## @@ -0,0 +1,245 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.s3guard; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3ObjectAttributes; +import org.apache.hadoop.fs.s3a.impl.StoreContext; +import org.apache.hadoop.util.DurationInfo; + +import static com.google.common.base.Preconditions.checkArgument; +import static org.apache.hadoop.fs.s3a.s3guard.S3Guard.addMoveAncestors; +import static org.apache.hadoop.fs.s3a.s3guard.S3Guard.addMoveDir; + +/** + * This rename tracker progressively updates the metadata store + * as it proceeds, during the parallelized copy operation. + * <p> + * Algorithm + * <ol> + * <li> + * As {@code RenameTracker.fileCopied()} callbacks + * are raised, the metastore is updated with the new file entry. + * </li> + * <li> + * Including parent entries, as appropriate. + * </li> + * <li> + * All directories which have been created are tracked locally, + * to avoid needing to read the store; this is a thread-safe structure. + * </li> + * <li> + * The actual update is performed out of any synchronized block. + * </li> + * <li> + * When deletes are executed, the store is also updated. + * </li> + * <li> + * And at the completion of a successful rename, the source directory + * is also removed. + * </li> + * </ol> + * <pre> + * + * </pre> + */ +public class ProgressiveRenameTracker extends RenameTracker { + + /** + * The collection of paths to delete; this is added as individual files + * are renamed. + * <p> + * The metastore is only updated with these entries after the DELETE + * call containing these paths succeeds. + * <p> + * If the DELETE fails; the filesystem will use + * {@code MultiObjectDeleteSupport} to remove all successfully deleted + * entries from the metastore. + */ + private final Collection<Path> pathsToDelete = new HashSet<>(); + + /** + * The list of new entries to add. + */ + private final List<PathMetadata> destMetas = new ArrayList<>(); + + public ProgressiveRenameTracker( + final StoreContext storeContext, + final MetadataStore metadataStore, + final Path sourceRoot, + final Path dest, + final BulkOperationState operationState) { + super("ProgressiveRenameTracker", + storeContext, metadataStore, sourceRoot, dest, operationState); + } + + /** + * When a file is copied, any ancestors + * are calculated and then the store is updated with + * the destination entries. + * <p> + * The source entries are added to the {@link #pathsToDelete} list. + * @param sourcePath path of source + * @param sourceAttributes status of source. + * @param destAttributes destination attributes + * @param destPath destination path. + * @param blockSize block size. + * @param addAncestors should ancestors be added? + * @throws IOException failure + */ + @Override + public void fileCopied( + final Path sourcePath, + final S3ObjectAttributes sourceAttributes, + final S3ObjectAttributes destAttributes, + final Path destPath, + final long blockSize, + final boolean addAncestors) throws IOException { + + // build the list of entries to add in a synchronized block. + final List<PathMetadata> entriesToAdd = new ArrayList<>(1); + LOG.debug("Updating store with copied file {}", sourcePath); + MetadataStore store = getMetadataStore(); + synchronized (this) { + checkArgument(!pathsToDelete.contains(sourcePath), + "File being renamed is already processed %s", destPath); + // create the file metadata and update the local structures. + S3Guard.addMoveFile( Review comment: Hopefully by the time I'm done reviewing I'll understand the architecture here enough to answer this myself, but we've previously talked about an alternate rename strategy where we create everything, and then we delete everything, so there's always 1 copy of the complete data. How well does this change fit in with that idea? In the event that there's a failure we delete the incomplete copy on retry. If that's the source, we're now done. If that's the destination, we now start over. Nice and recoverable, and until it succeeds you haven't disturbed anything. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
