http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java new file mode 100644 index 0000000..8515bfb --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/PathMetadataDynamoDBTranslation.java @@ -0,0 +1,304 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.s3guard; + +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; +import java.util.Collection; + +import com.amazonaws.services.dynamodbv2.document.Item; +import com.amazonaws.services.dynamodbv2.document.KeyAttribute; +import com.amazonaws.services.dynamodbv2.document.PrimaryKey; +import com.amazonaws.services.dynamodbv2.model.AttributeDefinition; +import com.amazonaws.services.dynamodbv2.model.KeySchemaElement; +import com.amazonaws.services.dynamodbv2.model.KeyType; +import com.amazonaws.services.dynamodbv2.model.ScalarAttributeType; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; + +import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.Tristate; + +/** + * Defines methods for translating between domain model objects and their + * representations in the DynamoDB schema. + */ [email protected] [email protected] +final class PathMetadataDynamoDBTranslation { + + /** The HASH key name of each item. */ + @VisibleForTesting + static final String PARENT = "parent"; + /** The RANGE key name of each item. */ + @VisibleForTesting + static final String CHILD = "child"; + @VisibleForTesting + static final String IS_DIR = "is_dir"; + @VisibleForTesting + static final String MOD_TIME = "mod_time"; + @VisibleForTesting + static final String FILE_LENGTH = "file_length"; + @VisibleForTesting + static final String BLOCK_SIZE = "block_size"; + static final String IS_DELETED = "is_deleted"; + + /** Table version field {@value} in version marker item. */ + @VisibleForTesting + static final String TABLE_VERSION = "table_version"; + + /** Table creation timestampfield {@value} in version marker item. */ + @VisibleForTesting + static final String TABLE_CREATED = "table_created"; + + /** The version marker field is invalid. */ + static final String E_NOT_VERSION_MARKER = "Not a version marker: "; + + /** + * Returns the key schema for the DynamoDB table. + * + * @return DynamoDB key schema + */ + static Collection<KeySchemaElement> keySchema() { + return Arrays.asList( + new KeySchemaElement(PARENT, KeyType.HASH), + new KeySchemaElement(CHILD, KeyType.RANGE)); + } + + /** + * Returns the attribute definitions for the DynamoDB table. + * + * @return DynamoDB attribute definitions + */ + static Collection<AttributeDefinition> attributeDefinitions() { + return Arrays.asList( + new AttributeDefinition(PARENT, ScalarAttributeType.S), + new AttributeDefinition(CHILD, ScalarAttributeType.S)); + } + + /** + * Converts a DynamoDB item to a {@link PathMetadata}. + * + * @param item DynamoDB item to convert + * @return {@code item} converted to a {@link PathMetadata} + */ + static PathMetadata itemToPathMetadata(Item item, String username) + throws IOException { + if (item == null) { + return null; + } + + String parentStr = item.getString(PARENT); + Preconditions.checkNotNull(parentStr, "No parent entry in item %s", item); + String childStr = item.getString(CHILD); + Preconditions.checkNotNull(childStr, "No child entry in item %s", item); + + // Skip table version markers, which are only non-absolute paths stored. + Path rawPath = new Path(parentStr, childStr); + if (!rawPath.isAbsoluteAndSchemeAuthorityNull()) { + return null; + } + + Path parent = new Path(Constants.FS_S3A + ":/" + parentStr + "/"); + Path path = new Path(parent, childStr); + + boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR); + final FileStatus fileStatus; + if (isDir) { + fileStatus = DynamoDBMetadataStore.makeDirStatus(path, username); + } else { + long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0; + long modTime = item.hasAttribute(MOD_TIME) ? item.getLong(MOD_TIME) : 0; + long block = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0; + fileStatus = new FileStatus(len, false, 1, block, modTime, 0, null, + username, username, path); + } + boolean isDeleted = + item.hasAttribute(IS_DELETED) && item.getBoolean(IS_DELETED); + + return new PathMetadata(fileStatus, Tristate.UNKNOWN, isDeleted); + } + + /** + * Converts a {@link PathMetadata} to a DynamoDB item. + * + * @param meta {@link PathMetadata} to convert + * @return {@code meta} converted to DynamoDB item + */ + static Item pathMetadataToItem(PathMetadata meta) { + Preconditions.checkNotNull(meta); + final FileStatus status = meta.getFileStatus(); + final Item item = new Item().withPrimaryKey(pathToKey(status.getPath())); + if (status.isDirectory()) { + item.withBoolean(IS_DIR, true); + } else { + item.withLong(FILE_LENGTH, status.getLen()) + .withLong(MOD_TIME, status.getModificationTime()) + .withLong(BLOCK_SIZE, status.getBlockSize()); + } + item.withBoolean(IS_DELETED, meta.isDeleted()); + return item; + } + + /** + * The version marker has a primary key whose PARENT is {@code name}; + * this MUST NOT be a value which represents an absolute path. + * @param name name of the version marker + * @param version version number + * @param timestamp creation timestamp + * @return an item representing a version marker. + */ + static Item createVersionMarker(String name, int version, long timestamp) { + return new Item().withPrimaryKey(createVersionMarkerPrimaryKey(name)) + .withInt(TABLE_VERSION, version) + .withLong(TABLE_CREATED, timestamp); + } + + /** + * Create the primary key of the version marker. + * @param name key name + * @return the key to use when registering or resolving version markers + */ + static PrimaryKey createVersionMarkerPrimaryKey(String name) { + return new PrimaryKey(PARENT, name, CHILD, name); + } + + /** + * Extract the version from a version marker item. + * @param marker version marker item + * @return the extracted version field + * @throws IOException if the item is not a version marker + */ + static int extractVersionFromMarker(Item marker) throws IOException { + if (marker.hasAttribute(TABLE_VERSION)) { + return marker.getInt(TABLE_VERSION); + } else { + throw new IOException(E_NOT_VERSION_MARKER + marker); + } + } + + /** + * Extract the creation time, if present. + * @param marker version marker item + * @return the creation time, or null + * @throws IOException if the item is not a version marker + */ + static Long extractCreationTimeFromMarker(Item marker) throws IOException { + if (marker.hasAttribute(TABLE_CREATED)) { + return marker.getLong(TABLE_CREATED); + } else { + return null; + } + } + + /** + * Converts a collection {@link PathMetadata} to a collection DynamoDB items. + * + * @see #pathMetadataToItem(PathMetadata) + */ + static Item[] pathMetadataToItem(Collection<PathMetadata> metas) { + if (metas == null) { + return null; + } + + final Item[] items = new Item[metas.size()]; + int i = 0; + for (PathMetadata meta : metas) { + items[i++] = pathMetadataToItem(meta); + } + return items; + } + + /** + * Converts a {@link Path} to a DynamoDB equality condition on that path as + * parent, suitable for querying all direct children of the path. + * + * @param path the path; can not be null + * @return DynamoDB equality condition on {@code path} as parent + */ + static KeyAttribute pathToParentKeyAttribute(Path path) { + return new KeyAttribute(PARENT, pathToParentKey(path)); + } + + /** + * e.g. {@code pathToParentKey(s3a://bucket/path/a) -> /bucket/path/a} + * @param path path to convert + * @return string for parent key + */ + static String pathToParentKey(Path path) { + Preconditions.checkNotNull(path); + Preconditions.checkArgument(path.isUriPathAbsolute(), "Path not absolute"); + URI uri = path.toUri(); + String bucket = uri.getHost(); + Preconditions.checkArgument(!StringUtils.isEmpty(bucket), + "Path missing bucket"); + String pKey = "/" + bucket + uri.getPath(); + + // Strip trailing slash + if (pKey.endsWith("/")) { + pKey = pKey.substring(0, pKey.length() - 1); + } + return pKey; + } + + /** + * Converts a {@link Path} to a DynamoDB key, suitable for getting the item + * matching the path. + * + * @param path the path; can not be null + * @return DynamoDB key for item matching {@code path} + */ + static PrimaryKey pathToKey(Path path) { + Preconditions.checkArgument(!path.isRoot(), + "Root path is not mapped to any PrimaryKey"); + return new PrimaryKey(PARENT, pathToParentKey(path.getParent()), CHILD, + path.getName()); + } + + /** + * Converts a collection of {@link Path} to a collection of DynamoDB keys. + * + * @see #pathToKey(Path) + */ + static PrimaryKey[] pathToKey(Collection<Path> paths) { + if (paths == null) { + return null; + } + + final PrimaryKey[] keys = new PrimaryKey[paths.size()]; + int i = 0; + for (Path p : paths) { + keys[i++] = pathToKey(p); + } + return keys; + } + + /** + * There is no need to instantiate this class. + */ + private PathMetadataDynamoDBTranslation() { + } + +}
http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java new file mode 100644 index 0000000..7e4aec1 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3Guard.java @@ -0,0 +1,463 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.s3guard; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Set; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.S3AFileStatus; +import org.apache.hadoop.fs.s3a.S3AInstrumentation; +import org.apache.hadoop.fs.s3a.Tristate; +import org.apache.hadoop.util.ReflectionUtils; + +import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL; +import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_LATENCY; +import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_REQUEST; +import static org.apache.hadoop.fs.s3a.S3AUtils.createUploadFileStatus; + +/** + * Logic for integrating MetadataStore with S3A. + */ [email protected] [email protected] +public final class S3Guard { + private static final Logger LOG = LoggerFactory.getLogger(S3Guard.class); + + @InterfaceAudience.Private + @InterfaceStability.Unstable + @VisibleForTesting + public static final String S3GUARD_DDB_CLIENT_FACTORY_IMPL = + "fs.s3a.s3guard.ddb.client.factory.impl"; + + static final Class<? extends DynamoDBClientFactory> + S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT = + DynamoDBClientFactory.DefaultDynamoDBClientFactory.class; + private static final FileStatus[] EMPTY_LISTING = new FileStatus[0]; + + // Utility class. All static functions. + private S3Guard() { } + + /* Utility functions. */ + + /** + * Create a new instance of the configured MetadataStore. + * The returned MetadataStore will have been initialized via + * {@link MetadataStore#initialize(FileSystem)} by this function before + * returning it. Callers must clean up by calling + * {@link MetadataStore#close()} when done using the MetadataStore. + * + * @param fs FileSystem whose Configuration specifies which + * implementation to use. + * @return Reference to new MetadataStore. + * @throws IOException if the metadata store cannot be instantiated + */ + public static MetadataStore getMetadataStore(FileSystem fs) + throws IOException { + Preconditions.checkNotNull(fs); + Configuration conf = fs.getConf(); + Preconditions.checkNotNull(conf); + MetadataStore msInstance; + try { + Class<? extends MetadataStore> msClass = getMetadataStoreClass(conf); + msInstance = ReflectionUtils.newInstance(msClass, conf); + LOG.debug("Using {} metadata store for {} filesystem", + msClass.getSimpleName(), fs.getScheme()); + msInstance.initialize(fs); + return msInstance; + } catch (RuntimeException | IOException e) { + String message = "Failed to instantiate metadata store " + + conf.get(S3_METADATA_STORE_IMPL) + + " defined in " + S3_METADATA_STORE_IMPL + + ": " + e; + LOG.error(message, e); + if (e instanceof IOException) { + throw e; + } else { + throw new IOException(message, e); + } + } + } + + private static Class<? extends MetadataStore> getMetadataStoreClass( + Configuration conf) { + if (conf == null) { + return NullMetadataStore.class; + } + + return conf.getClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class, + MetadataStore.class); + } + + + /** + * Helper function which puts a given S3AFileStatus into the MetadataStore and + * returns the same S3AFileStatus. Instrumentation monitors the put operation. + * @param ms MetadataStore to {@code put()} into. + * @param status status to store + * @param instrumentation instrumentation of the s3a file system + * @return The same status as passed in + * @throws IOException if metadata store update failed + */ + public static S3AFileStatus putAndReturn(MetadataStore ms, + S3AFileStatus status, + S3AInstrumentation instrumentation) throws IOException { + long startTimeNano = System.nanoTime(); + ms.put(new PathMetadata(status)); + instrumentation.addValueToQuantiles(S3GUARD_METADATASTORE_PUT_PATH_LATENCY, + (System.nanoTime() - startTimeNano)); + instrumentation.incrementCounter(S3GUARD_METADATASTORE_PUT_PATH_REQUEST, 1); + return status; + } + + /** + * Convert the data of a directory listing to an array of {@link FileStatus} + * entries. Tombstones are filtered out at this point. If the listing is null + * an empty array is returned. + * @param dirMeta directory listing -may be null + * @return a possibly-empty array of file status entries + */ + public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) { + if (dirMeta == null) { + return EMPTY_LISTING; + } + + Collection<PathMetadata> listing = dirMeta.getListing(); + List<FileStatus> statuses = new ArrayList<>(); + + for (PathMetadata pm : listing) { + if (!pm.isDeleted()) { + statuses.add(pm.getFileStatus()); + } + } + + return statuses.toArray(new FileStatus[0]); + } + + /** + * Given directory listing metadata from both the backing store and the + * MetadataStore, merge the two sources of truth to create a consistent + * view of the current directory contents, which can be returned to clients. + * + * Also update the MetadataStore to reflect the resulting directory listing. + * + * @param ms MetadataStore to use. + * @param path path to directory + * @param backingStatuses Directory listing from the backing store. + * @param dirMeta Directory listing from MetadataStore. May be null. + * @param isAuthoritative State of authoritative mode + * @return Final result of directory listing. + * @throws IOException if metadata store update failed + */ + public static FileStatus[] dirListingUnion(MetadataStore ms, Path path, + List<FileStatus> backingStatuses, DirListingMetadata dirMeta, + boolean isAuthoritative) throws IOException { + + // Fast-path for NullMetadataStore + if (isNullMetadataStore(ms)) { + return backingStatuses.toArray(new FileStatus[backingStatuses.size()]); + } + + assertQualified(path); + + if (dirMeta == null) { + // The metadataStore had zero state for this directory + dirMeta = new DirListingMetadata(path, DirListingMetadata.EMPTY_DIR, + false); + } + + Set<Path> deleted = dirMeta.listTombstones(); + + // Since we treat the MetadataStore as a "fresher" or "consistent" view + // of metadata, we always use its metadata first. + + // Since the authoritative case is already handled outside this function, + // we will basically start with the set of directory entries in the + // DirListingMetadata, and add any that only exist in the backingStatuses. + + boolean changed = false; + for (FileStatus s : backingStatuses) { + if (deleted.contains(s.getPath())) { + continue; + } + + // Minor race condition here. Multiple threads could add to this + // mutable DirListingMetadata. Since it is backed by a + // ConcurrentHashMap, the last put() wins. + // More concerning is two threads racing on listStatus() and delete(). + // Any FileSystem has similar race conditions, but we could persist + // a stale entry longer. We could expose an atomic + // DirListingMetadata#putIfNotPresent() + boolean updated = dirMeta.put(s); + changed = changed || updated; + } + + if (changed && isAuthoritative) { + dirMeta.setAuthoritative(true); // This is the full directory contents + ms.put(dirMeta); + } + + return dirMetaToStatuses(dirMeta); + } + + /** + * Although NullMetadataStore does nothing, callers may wish to avoid work + * (fast path) when the NullMetadataStore is in use. + * @param ms The MetadataStore to test + * @return true iff the MetadataStore is the null, or no-op, implementation. + */ + public static boolean isNullMetadataStore(MetadataStore ms) { + return (ms instanceof NullMetadataStore); + } + + /** + * Update MetadataStore to reflect creation of the given directories. + * + * If an IOException is raised while trying to update the entry, this + * operation catches the exception and returns. + * @param ms MetadataStore to update. + * @param dirs null, or an ordered list of directories from leaf to root. + * E.g. if /a/ exists, and mkdirs(/a/b/c/d) is called, this + * list will contain [/a/b/c/d, /a/b/c, /a/b]. /a/b/c/d is + * an empty, dir, and the other dirs only contain their child + * dir. + * @param owner Hadoop user name. + * @param authoritative Whether to mark new directories as authoritative. + */ + public static void makeDirsOrdered(MetadataStore ms, List<Path> dirs, + String owner, boolean authoritative) { + if (dirs == null) { + return; + } + + /* We discussed atomicity of this implementation. + * The concern is that multiple clients could race to write different + * cached directories to the MetadataStore. Two solutions are proposed: + * 1. Move mkdirs() into MetadataStore interface and let implementations + * ensure they are atomic. + * 2. Specify that the semantics of MetadataStore#putListStatus() is + * always additive, That is, if MetadataStore has listStatus() state + * for /a/b that contains [/a/b/file0, /a/b/file1], and we then call + * putListStatus(/a/b -> [/a/b/file2, /a/b/file3], isAuthoritative=true), + * then we will end up with final state of + * [/a/b/file0, /a/b/file1, /a/b/file2, /a/b/file3], isAuthoritative = + * true + */ + FileStatus prevStatus = null; + + // Use new batched put to reduce round trips. + List<PathMetadata> pathMetas = new ArrayList<>(dirs.size()); + + try { + // Iterate from leaf to root + for (int i = 0; i < dirs.size(); i++) { + boolean isLeaf = (prevStatus == null); + Path f = dirs.get(i); + assertQualified(f); + FileStatus status = + createUploadFileStatus(f, true, 0, 0, owner); + + // We only need to put a DirListingMetadata if we are setting + // authoritative bit + DirListingMetadata dirMeta = null; + if (authoritative) { + Collection<PathMetadata> children; + if (isLeaf) { + children = DirListingMetadata.EMPTY_DIR; + } else { + children = new ArrayList<>(1); + children.add(new PathMetadata(prevStatus)); + } + dirMeta = new DirListingMetadata(f, children, authoritative); + ms.put(dirMeta); + } + + pathMetas.add(new PathMetadata(status)); + prevStatus = status; + } + + // Batched put + ms.put(pathMetas); + } catch (IOException ioe) { + LOG.error("MetadataStore#put() failure:", ioe); + } + } + + /** + * Helper function that records the move of directory paths, adding + * resulting metadata to the supplied lists. + * Does not store in MetadataStore. + * @param ms MetadataStore, used to make this a no-op, when it is + * NullMetadataStore. + * @param srcPaths stores the source path here + * @param dstMetas stores destination metadata here + * @param srcPath source path to store + * @param dstPath destination path to store + * @param owner file owner to use in created records + */ + public static void addMoveDir(MetadataStore ms, Collection<Path> srcPaths, + Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath, + String owner) { + if (isNullMetadataStore(ms)) { + return; + } + assertQualified(srcPath, dstPath); + + FileStatus dstStatus = createUploadFileStatus(dstPath, true, 0, 0, owner); + addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus); + } + + /** + * Like {@link #addMoveDir(MetadataStore, Collection, Collection, Path, + * Path, String)} (), but for files. + * @param ms MetadataStore, used to make this a no-op, when it is + * NullMetadataStore. + * @param srcPaths stores the source path here + * @param dstMetas stores destination metadata here + * @param srcPath source path to store + * @param dstPath destination path to store + * @param size length of file moved + * @param blockSize blocksize to associate with destination file + * @param owner file owner to use in created records + */ + public static void addMoveFile(MetadataStore ms, Collection<Path> srcPaths, + Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath, + long size, long blockSize, String owner) { + if (isNullMetadataStore(ms)) { + return; + } + assertQualified(srcPath, dstPath); + FileStatus dstStatus = createUploadFileStatus(dstPath, false, + size, blockSize, owner); + addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus); + } + + /** + * Helper method that records the move of all ancestors of a path. + * + * In S3A, an optimization is to delete unnecessary fake directory objects if + * the directory is non-empty. In that case, for a nested child to move, S3A + * is not listing and thus moving all its ancestors (up to source root). So we + * take care of those inferred directories of this path explicitly. + * + * As {@link #addMoveFile} and {@link #addMoveDir}, this method adds resulting + * metadata to the supplied lists. It does not store in MetadataStore. + * + * @param ms MetadataStore, no-op if it is NullMetadataStore + * @param srcPaths stores the source path here + * @param dstMetas stores destination metadata here + * @param srcRoot source root up to which (exclusive) should we add ancestors + * @param srcPath source path of the child to add ancestors + * @param dstPath destination path of the child to add ancestors + * @param owner Hadoop user name + */ + public static void addMoveAncestors(MetadataStore ms, + Collection<Path> srcPaths, Collection<PathMetadata> dstMetas, + Path srcRoot, Path srcPath, Path dstPath, String owner) { + if (isNullMetadataStore(ms)) { + return; + } + + assertQualified(srcRoot, srcPath, dstPath); + + if (srcPath.equals(srcRoot)) { + LOG.debug("Skip moving ancestors of source root directory {}", srcRoot); + return; + } + + Path parentSrc = srcPath.getParent(); + Path parentDst = dstPath.getParent(); + while (parentSrc != null + && !parentSrc.isRoot() + && !parentSrc.equals(srcRoot) + && !srcPaths.contains(parentSrc)) { + LOG.debug("Renaming non-listed parent {} to {}", parentSrc, parentDst); + S3Guard.addMoveDir(ms, srcPaths, dstMetas, parentSrc, parentDst, owner); + parentSrc = parentSrc.getParent(); + parentDst = parentDst.getParent(); + } + } + + public static void addAncestors(MetadataStore metadataStore, + Path qualifiedPath, String username) throws IOException { + Collection<PathMetadata> newDirs = new ArrayList<>(); + Path parent = qualifiedPath.getParent(); + while (!parent.isRoot()) { + PathMetadata directory = metadataStore.get(parent); + if (directory == null || directory.isDeleted()) { + FileStatus status = new FileStatus(0, true, 1, 0, 0, 0, null, username, + null, parent); + PathMetadata meta = new PathMetadata(status, Tristate.FALSE, false); + newDirs.add(meta); + } else { + break; + } + parent = parent.getParent(); + } + metadataStore.put(newDirs); + } + + private static void addMoveStatus(Collection<Path> srcPaths, + Collection<PathMetadata> dstMetas, + Path srcPath, + FileStatus dstStatus) { + srcPaths.add(srcPath); + dstMetas.add(new PathMetadata(dstStatus)); + } + + /** + * Assert that the path is qualified with a host and scheme. + * @param p path to check + * @throws NullPointerException if either argument does not hold + */ + public static void assertQualified(Path p) { + URI uri = p.toUri(); + // Paths must include bucket in case MetadataStore is shared between + // multiple S3AFileSystem instances + Preconditions.checkNotNull(uri.getHost(), "Null host in " + uri); + + // This should never fail, but is retained for completeness. + Preconditions.checkNotNull(uri.getScheme(), "Null scheme in " + uri); + } + + /** + * Assert that all paths are valid. + * @param paths path to check + * @throws NullPointerException if either argument does not hold + */ + public static void assertQualified(Path...paths) { + for (Path path : paths) { + assertQualified(path); + } + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java new file mode 100644 index 0000000..be271ae --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -0,0 +1,924 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.s3guard; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.PrintStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3AFileStatus; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.shell.CommandFormat; +import org.apache.hadoop.util.GenericOptionsParser; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import static org.apache.hadoop.fs.s3a.Constants.*; + +/** + * CLI to manage S3Guard Metadata Store. + */ +public abstract class S3GuardTool extends Configured implements Tool { + private static final Logger LOG = LoggerFactory.getLogger(S3GuardTool.class); + + private static final String NAME = "s3guard"; + private static final String COMMON_USAGE = + "When possible and not overridden by more specific options, metadata\n" + + "repository information will be inferred from the S3A URL (if provided)" + + "\n\n" + + "Generic options supported are:\n" + + " -conf <config file> - specify an application configuration file\n" + + " -D <property=value> - define a value for a given property\n"; + + private static final String USAGE = NAME + + " [command] [OPTIONS] [s3a://BUCKET]\n\n" + + "Commands: \n" + + "\t" + Init.NAME + " - " + Init.PURPOSE + "\n" + + "\t" + Destroy.NAME + " - " + Destroy.PURPOSE + "\n" + + "\t" + Import.NAME + " - " + Import.PURPOSE + "\n" + + "\t" + Diff.NAME + " - " + Diff.PURPOSE + "\n" + + "\t" + Prune.NAME + " - " + Prune.PURPOSE + "\n"; + private static final String DATA_IN_S3_IS_PRESERVED + = "(all data in S3 is preserved"; + + abstract public String getUsage(); + + // Exit codes + static final int SUCCESS = 0; + static final int INVALID_ARGUMENT = 1; + static final int ERROR = 99; + + private S3AFileSystem filesystem; + private MetadataStore store; + private final CommandFormat commandFormat; + + private static final String META_FLAG = "meta"; + private static final String DAYS_FLAG = "days"; + private static final String HOURS_FLAG = "hours"; + private static final String MINUTES_FLAG = "minutes"; + private static final String SECONDS_FLAG = "seconds"; + + private static final String REGION_FLAG = "region"; + private static final String READ_FLAG = "read"; + private static final String WRITE_FLAG = "write"; + + /** + * Constructor a S3Guard tool with HDFS configuration. + * @param conf Configuration. + */ + protected S3GuardTool(Configuration conf) { + super(conf); + + commandFormat = new CommandFormat(0, Integer.MAX_VALUE); + // For metadata store URI + commandFormat.addOptionWithValue(META_FLAG); + // DDB region. + commandFormat.addOptionWithValue(REGION_FLAG); + } + + /** + * Return sub-command name. + */ + abstract String getName(); + + /** + * Parse DynamoDB region from either -m option or a S3 path. + * + * This function should only be called from {@link Init} or + * {@link Destroy}. + * + * @param paths remaining parameters from CLI. + * @return false for invalid parameters. + * @throws IOException on I/O errors. + */ + boolean parseDynamoDBRegion(List<String> paths) throws IOException { + Configuration conf = getConf(); + String fromCli = getCommandFormat().getOptValue(REGION_FLAG); + String fromConf = conf.get(S3GUARD_DDB_REGION_KEY); + boolean hasS3Path = !paths.isEmpty(); + + if (fromCli != null) { + if (fromCli.isEmpty()) { + System.err.println("No region provided with -" + REGION_FLAG + " flag"); + return false; + } + if (hasS3Path) { + System.err.println("Providing both an S3 path and the -" + REGION_FLAG + + " flag is not supported. If you need to specify a different " + + "region than the S3 bucket, configure " + S3GUARD_DDB_REGION_KEY); + return false; + } + conf.set(S3GUARD_DDB_REGION_KEY, fromCli); + return true; + } + + if (fromConf != null) { + if (fromConf.isEmpty()) { + System.err.printf("No region provided with config %s, %n", + S3GUARD_DDB_REGION_KEY); + return false; + } + return true; + } + + if (hasS3Path) { + String s3Path = paths.get(0); + initS3AFileSystem(s3Path); + return true; + } + + System.err.println("No region found from -" + REGION_FLAG + " flag, " + + "config, or S3 bucket"); + return false; + } + + /** + * Parse metadata store from command line option or HDFS configuration. + * + * @param forceCreate override the auto-creation setting to true. + * @return a initialized metadata store. + */ + MetadataStore initMetadataStore(boolean forceCreate) throws IOException { + if (getStore() != null) { + return getStore(); + } + Configuration conf; + if (filesystem == null) { + conf = getConf(); + } else { + conf = filesystem.getConf(); + } + String metaURI = getCommandFormat().getOptValue(META_FLAG); + if (metaURI != null && !metaURI.isEmpty()) { + URI uri = URI.create(metaURI); + LOG.info("create metadata store: {}", uri + " scheme: " + + uri.getScheme()); + switch (uri.getScheme().toLowerCase(Locale.ENGLISH)) { + case "local": + setStore(new LocalMetadataStore()); + break; + case "dynamodb": + setStore(new DynamoDBMetadataStore()); + conf.set(S3GUARD_DDB_TABLE_NAME_KEY, uri.getAuthority()); + if (forceCreate) { + conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true); + } + break; + default: + throw new IOException( + String.format("Metadata store %s is not supported", uri)); + } + } else { + // CLI does not specify metadata store URI, it uses default metadata store + // DynamoDB instead. + setStore(new DynamoDBMetadataStore()); + if (forceCreate) { + conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true); + } + } + + if (filesystem == null) { + getStore().initialize(conf); + } else { + getStore().initialize(filesystem); + } + LOG.info("Metadata store {} is initialized.", getStore()); + return getStore(); + } + + /** + * Initialize S3A FileSystem instance. + * + * @param path s3a URI + * @throws IOException + */ + void initS3AFileSystem(String path) throws IOException { + URI uri; + try { + uri = new URI(path); + } catch (URISyntaxException e) { + throw new IOException(e); + } + // Make sure that S3AFileSystem does not hold an actual MetadataStore + // implementation. + Configuration conf = getConf(); + conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class, + MetadataStore.class); + FileSystem fs = FileSystem.get(uri, getConf()); + if (!(fs instanceof S3AFileSystem)) { + throw new IOException( + String.format("URI %s is not a S3A file system: %s", uri, + fs.getClass().getName())); + } + filesystem = (S3AFileSystem) fs; + } + + /** + * Parse CLI arguments and returns the position arguments. + * The options are stored in {@link #commandFormat} + * + * @param args command line arguments. + * @return the position arguments from CLI. + */ + List<String> parseArgs(String[] args) { + return getCommandFormat().parse(args, 1); + } + + protected S3AFileSystem getFilesystem() { + return filesystem; + } + + protected void setFilesystem(S3AFileSystem filesystem) { + this.filesystem = filesystem; + } + + @VisibleForTesting + public MetadataStore getStore() { + return store; + } + + @VisibleForTesting + protected void setStore(MetadataStore store) { + Preconditions.checkNotNull(store); + this.store = store; + } + + protected CommandFormat getCommandFormat() { + return commandFormat; + } + + /** + * Create the metadata store. + */ + static class Init extends S3GuardTool { + private static final String NAME = "init"; + public static final String PURPOSE = "initialize metadata repository"; + private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" + + "\t" + PURPOSE + "\n\n" + + "Common options:\n" + + " -" + META_FLAG + " URL - Metadata repository details " + + "(implementation-specific)\n" + + "\n" + + "Amazon DynamoDB-specific options:\n" + + " -" + REGION_FLAG + " REGION - Service region for connections\n" + + " -" + READ_FLAG + " UNIT - Provisioned read throughput units\n" + + " -" + WRITE_FLAG + " UNIT - Provisioned write through put units\n" + + "\n" + + " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" + + " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" + + " is not supported."; + + Init(Configuration conf) { + super(conf); + // read capacity. + getCommandFormat().addOptionWithValue(READ_FLAG); + // write capacity. + getCommandFormat().addOptionWithValue(WRITE_FLAG); + } + + @Override + String getName() { + return NAME; + } + + @Override + public String getUsage() { + return USAGE; + } + + @Override + public int run(String[] args) throws IOException { + List<String> paths = parseArgs(args); + + String readCap = getCommandFormat().getOptValue(READ_FLAG); + if (readCap != null && !readCap.isEmpty()) { + int readCapacity = Integer.parseInt(readCap); + getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, readCapacity); + } + String writeCap = getCommandFormat().getOptValue(WRITE_FLAG); + if (writeCap != null && !writeCap.isEmpty()) { + int writeCapacity = Integer.parseInt(writeCap); + getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, writeCapacity); + } + + // Validate parameters. + if (!parseDynamoDBRegion(paths)) { + System.err.println(USAGE); + return INVALID_ARGUMENT; + } + initMetadataStore(true); + return SUCCESS; + } + } + + /** + * Destroy a metadata store. + */ + static class Destroy extends S3GuardTool { + private static final String NAME = "destroy"; + public static final String PURPOSE = "destroy Metadata Store data " + + DATA_IN_S3_IS_PRESERVED; + private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" + + "\t" + PURPOSE + "\n\n" + + "Common options:\n" + + " -" + META_FLAG + " URL - Metadata repository details " + + "(implementation-specific)\n" + + "\n" + + "Amazon DynamoDB-specific options:\n" + + " -" + REGION_FLAG + " REGION - Service region for connections\n" + + "\n" + + " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" + + " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" + + " is not supported."; + + Destroy(Configuration conf) { + super(conf); + } + + @Override + String getName() { + return NAME; + } + + @Override + public String getUsage() { + return USAGE; + } + + public int run(String[] args) throws IOException { + List<String> paths = parseArgs(args); + if (!parseDynamoDBRegion(paths)) { + System.err.println(USAGE); + return INVALID_ARGUMENT; + } + + try { + initMetadataStore(false); + } catch (FileNotFoundException e) { + // indication that the table was not found + LOG.debug("Failed to bind to store to be destroyed", e); + LOG.info("Metadata Store does not exist."); + return SUCCESS; + } + + Preconditions.checkState(getStore() != null, + "Metadata Store is not initialized"); + + getStore().destroy(); + LOG.info("Metadata store is deleted."); + return SUCCESS; + } + } + + /** + * Import s3 metadata to the metadata store. + */ + static class Import extends S3GuardTool { + private static final String NAME = "import"; + public static final String PURPOSE = "import metadata from existing S3 " + + "data"; + private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" + + "\t" + PURPOSE + "\n\n" + + "Common options:\n" + + " -" + META_FLAG + " URL - Metadata repository details " + + "(implementation-specific)\n" + + "\n" + + "Amazon DynamoDB-specific options:\n" + + " -" + REGION_FLAG + " REGION - Service region for connections\n" + + "\n" + + " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" + + " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" + + " is not supported."; + + private final Set<Path> dirCache = new HashSet<>(); + + Import(Configuration conf) { + super(conf); + } + + @Override + String getName() { + return NAME; + } + + @Override + public String getUsage() { + return USAGE; + } + + /** + * Put parents into MS and cache if the parents are not presented. + * + * @param f the file or an empty directory. + * @throws IOException on I/O errors. + */ + private void putParentsIfNotPresent(FileStatus f) throws IOException { + Preconditions.checkNotNull(f); + Path parent = f.getPath().getParent(); + while (parent != null) { + if (dirCache.contains(parent)) { + return; + } + FileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent, + f.getOwner()); + getStore().put(new PathMetadata(dir)); + dirCache.add(parent); + parent = parent.getParent(); + } + } + + /** + * Recursively import every path under path. + * @return number of items inserted into MetadataStore + * @throws IOException on I/O errors. + */ + private long importDir(FileStatus status) throws IOException { + Preconditions.checkArgument(status.isDirectory()); + RemoteIterator<LocatedFileStatus> it = getFilesystem() + .listFilesAndEmptyDirectories(status.getPath(), true); + long items = 0; + + while (it.hasNext()) { + LocatedFileStatus located = it.next(); + FileStatus child; + if (located.isDirectory()) { + child = DynamoDBMetadataStore.makeDirStatus(located.getPath(), + located.getOwner()); + dirCache.add(child.getPath()); + } else { + child = new S3AFileStatus(located.getLen(), + located.getModificationTime(), + located.getPath(), + located.getBlockSize(), + located.getOwner()); + } + putParentsIfNotPresent(child); + getStore().put(new PathMetadata(child)); + items++; + } + return items; + } + + @Override + public int run(String[] args) throws IOException { + List<String> paths = parseArgs(args); + if (paths.isEmpty()) { + System.err.println(getUsage()); + return INVALID_ARGUMENT; + } + String s3Path = paths.get(0); + initS3AFileSystem(s3Path); + + URI uri; + try { + uri = new URI(s3Path); + } catch (URISyntaxException e) { + throw new IOException(e); + } + String filePath = uri.getPath(); + if (filePath.isEmpty()) { + // If they specify a naked S3 URI (e.g. s3a://bucket), we'll consider + // root to be the path + filePath = "/"; + } + Path path = new Path(filePath); + FileStatus status = getFilesystem().getFileStatus(path); + + initMetadataStore(false); + + long items = 1; + if (status.isFile()) { + PathMetadata meta = new PathMetadata(status); + getStore().put(meta); + } else { + items = importDir(status); + } + + System.out.printf("Inserted %d items into Metadata Store%n", items); + + return SUCCESS; + } + } + + /** + * Show diffs between the s3 and metadata store. + */ + static class Diff extends S3GuardTool { + private static final String NAME = "diff"; + public static final String PURPOSE = "report on delta between S3 and " + + "repository"; + private static final String USAGE = NAME + " [OPTIONS] s3a://BUCKET\n" + + "\t" + PURPOSE + "\n\n" + + "Common options:\n" + + " -" + META_FLAG + " URL - Metadata repository details " + + "(implementation-specific)\n" + + "\n" + + "Amazon DynamoDB-specific options:\n" + + " -" + REGION_FLAG + " REGION - Service region for connections\n" + + "\n" + + " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" + + " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" + + " is not supported."; + + private static final String SEP = "\t"; + static final String S3_PREFIX = "S3"; + static final String MS_PREFIX = "MS"; + + Diff(Configuration conf) { + super(conf); + } + + @Override + String getName() { + return NAME; + } + + @Override + public String getUsage() { + return USAGE; + } + + /** + * Formats the output of printing a FileStatus in S3guard diff tool. + * @param status the status to print. + * @return the string of output. + */ + private static String formatFileStatus(FileStatus status) { + return String.format("%s%s%d%s%s", + status.isDirectory() ? "D" : "F", + SEP, + status.getLen(), + SEP, + status.getPath().toString()); + } + + /** + * Compares metadata from 2 S3 FileStatus's to see if they differ. + * @param thisOne + * @param thatOne + * @return true if the metadata is not identical + */ + private static boolean differ(FileStatus thisOne, FileStatus thatOne) { + Preconditions.checkArgument(!(thisOne == null && thatOne == null)); + return (thisOne == null || thatOne == null) || + (thisOne.getLen() != thatOne.getLen()) || + (thisOne.isDirectory() != thatOne.isDirectory()) || + (!thisOne.isDirectory() && + thisOne.getModificationTime() != thatOne.getModificationTime()); + } + + /** + * Print difference, if any, between two file statuses to the output stream. + * + * @param msStatus file status from metadata store. + * @param s3Status file status from S3. + * @param out output stream. + */ + private static void printDiff(FileStatus msStatus, + FileStatus s3Status, + PrintStream out) { + Preconditions.checkArgument(!(msStatus == null && s3Status == null)); + if (msStatus != null && s3Status != null) { + Preconditions.checkArgument( + msStatus.getPath().equals(s3Status.getPath()), + String.format("The path from metadata store and s3 are different:" + + " ms=%s s3=%s", msStatus.getPath(), s3Status.getPath())); + } + + if (differ(msStatus, s3Status)) { + if (s3Status != null) { + out.printf("%s%s%s%n", S3_PREFIX, SEP, formatFileStatus(s3Status)); + } + if (msStatus != null) { + out.printf("%s%s%s%n", MS_PREFIX, SEP, formatFileStatus(msStatus)); + } + } + } + + /** + * Compare the metadata of the directory with the same path, on S3 and + * the metadata store, respectively. If one of them is null, consider the + * metadata of the directory and all its subdirectories are missing from + * the source. + * + * Pass the FileStatus obtained from s3 and metadata store to avoid one + * round trip to fetch the same metadata twice, because the FileStatus + * hve already been obtained from listStatus() / listChildren operations. + * + * @param msDir the directory FileStatus obtained from the metadata store. + * @param s3Dir the directory FileStatus obtained from S3. + * @param out the output stream to generate diff results. + * @throws IOException on I/O errors. + */ + private void compareDir(FileStatus msDir, FileStatus s3Dir, + PrintStream out) throws IOException { + Preconditions.checkArgument(!(msDir == null && s3Dir == null)); + if (msDir != null && s3Dir != null) { + Preconditions.checkArgument(msDir.getPath().equals(s3Dir.getPath()), + String.format("The path from metadata store and s3 are different:" + + " ms=%s s3=%s", msDir.getPath(), s3Dir.getPath())); + } + + Map<Path, FileStatus> s3Children = new HashMap<>(); + if (s3Dir != null && s3Dir.isDirectory()) { + for (FileStatus status : getFilesystem().listStatus(s3Dir.getPath())) { + s3Children.put(status.getPath(), status); + } + } + + Map<Path, FileStatus> msChildren = new HashMap<>(); + if (msDir != null && msDir.isDirectory()) { + DirListingMetadata dirMeta = + getStore().listChildren(msDir.getPath()); + + if (dirMeta != null) { + for (PathMetadata meta : dirMeta.getListing()) { + FileStatus status = meta.getFileStatus(); + msChildren.put(status.getPath(), status); + } + } + } + + Set<Path> allPaths = new HashSet<>(s3Children.keySet()); + allPaths.addAll(msChildren.keySet()); + + for (Path path : allPaths) { + FileStatus s3Status = s3Children.get(path); + FileStatus msStatus = msChildren.get(path); + printDiff(msStatus, s3Status, out); + if ((s3Status != null && s3Status.isDirectory()) || + (msStatus != null && msStatus.isDirectory())) { + compareDir(msStatus, s3Status, out); + } + } + out.flush(); + } + + /** + * Compare both metadata store and S3 on the same path. + * + * @param path the path to be compared. + * @param out the output stream to display results. + * @throws IOException on I/O errors. + */ + private void compareRoot(Path path, PrintStream out) throws IOException { + Path qualified = getFilesystem().qualify(path); + FileStatus s3Status = null; + try { + s3Status = getFilesystem().getFileStatus(qualified); + } catch (FileNotFoundException e) { + } + PathMetadata meta = getStore().get(qualified); + FileStatus msStatus = (meta != null && !meta.isDeleted()) ? + meta.getFileStatus() : null; + compareDir(msStatus, s3Status, out); + } + + @VisibleForTesting + public int run(String[] args, PrintStream out) throws IOException { + List<String> paths = parseArgs(args); + if (paths.isEmpty()) { + out.println(USAGE); + return INVALID_ARGUMENT; + } + String s3Path = paths.get(0); + initS3AFileSystem(s3Path); + initMetadataStore(true); + + URI uri; + try { + uri = new URI(s3Path); + } catch (URISyntaxException e) { + throw new IOException(e); + } + Path root; + if (uri.getPath().isEmpty()) { + root = new Path("/"); + } else { + root = new Path(uri.getPath()); + } + root = getFilesystem().qualify(root); + compareRoot(root, out); + out.flush(); + return SUCCESS; + } + + @Override + public int run(String[] args) throws IOException { + return run(args, System.out); + } + } + + /** + * Prune metadata that has not been modified recently. + */ + static class Prune extends S3GuardTool { + private static final String NAME = "prune"; + public static final String PURPOSE = "truncate older metadata from " + + "repository " + + DATA_IN_S3_IS_PRESERVED;; + private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" + + "\t" + PURPOSE + "\n\n" + + "Common options:\n" + + " -" + META_FLAG + " URL - Metadata repository details " + + "(implementation-specific)\n" + + "\n" + + "Amazon DynamoDB-specific options:\n" + + " -" + REGION_FLAG + " REGION - Service region for connections\n" + + "\n" + + " URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" + + " Specifying both the -" + REGION_FLAG + " option and an S3A path\n" + + " is not supported."; + + Prune(Configuration conf) { + super(conf); + + CommandFormat format = getCommandFormat(); + format.addOptionWithValue(DAYS_FLAG); + format.addOptionWithValue(HOURS_FLAG); + format.addOptionWithValue(MINUTES_FLAG); + format.addOptionWithValue(SECONDS_FLAG); + } + + @VisibleForTesting + void setMetadataStore(MetadataStore ms) { + Preconditions.checkNotNull(ms); + this.setStore(ms); + } + + @Override + String getName() { + return NAME; + } + + @Override + public String getUsage() { + return USAGE; + } + + private long getDeltaComponent(TimeUnit unit, String arg) { + String raw = getCommandFormat().getOptValue(arg); + if (raw == null || raw.isEmpty()) { + return 0; + } + Long parsed = Long.parseLong(raw); + return unit.toMillis(parsed); + } + + @VisibleForTesting + public int run(String[] args, PrintStream out) throws + InterruptedException, IOException { + List<String> paths = parseArgs(args); + if (!parseDynamoDBRegion(paths)) { + System.err.println(USAGE); + return INVALID_ARGUMENT; + } + initMetadataStore(false); + + Configuration conf = getConf(); + long confDelta = conf.getLong(Constants.S3GUARD_CLI_PRUNE_AGE, 0); + + long cliDelta = 0; + cliDelta += getDeltaComponent(TimeUnit.DAYS, "days"); + cliDelta += getDeltaComponent(TimeUnit.HOURS, "hours"); + cliDelta += getDeltaComponent(TimeUnit.MINUTES, "minutes"); + cliDelta += getDeltaComponent(TimeUnit.SECONDS, "seconds"); + + if (confDelta <= 0 && cliDelta <= 0) { + System.err.println( + "You must specify a positive age for metadata to prune."); + } + + // A delta provided on the CLI overrides if one is configured + long delta = confDelta; + if (cliDelta > 0) { + delta = cliDelta; + } + + long now = System.currentTimeMillis(); + long divide = now - delta; + + getStore().prune(divide); + + out.flush(); + return SUCCESS; + } + + @Override + public int run(String[] args) throws InterruptedException, IOException { + return run(args, System.out); + } + } + + private static S3GuardTool command; + + private static void printHelp() { + if (command == null) { + System.err.println("Usage: hadoop " + USAGE); + System.err.println("\tperform S3Guard metadata store " + + "administrative commands."); + } else { + System.err.println("Usage: hadoop " + command.getUsage()); + } + System.err.println(); + System.err.println(COMMON_USAGE); + } + + /** + * Execute the command with the given arguments. + * + * @param args command specific arguments. + * @param conf Hadoop configuration. + * @return exit code. + * @throws Exception on I/O errors. + */ + public static int run(String[] args, Configuration conf) throws + Exception { + /* ToolRunner.run does this too, but we must do it before looking at + subCommand or instantiating the cmd object below */ + String[] otherArgs = new GenericOptionsParser(conf, args) + .getRemainingArgs(); + if (otherArgs.length == 0) { + printHelp(); + return INVALID_ARGUMENT; + } + final String subCommand = otherArgs[0]; + switch (subCommand) { + case Init.NAME: + command = new Init(conf); + break; + case Destroy.NAME: + command = new Destroy(conf); + break; + case Import.NAME: + command = new Import(conf); + break; + case Diff.NAME: + command = new Diff(conf); + break; + case Prune.NAME: + command = new Prune(conf); + break; + default: + printHelp(); + return INVALID_ARGUMENT; + } + return ToolRunner.run(conf, command, otherArgs); + } + + /** + * Main entry point. Calls {@code System.exit()} on all execution paths. + * @param args argument list + */ + public static void main(String[] args) { + try { + int ret = run(args, new Configuration()); + System.exit(ret); + } catch (CommandFormat.UnknownOptionException e) { + System.err.println(e.getMessage()); + printHelp(); + System.exit(INVALID_ARGUMENT); + } catch (Throwable e) { + e.printStackTrace(System.err); + System.exit(ERROR); + } + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java new file mode 100644 index 0000000..d430315 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/package-info.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * This package contains classes related to S3Guard: a feature of S3A to mask + * the eventual consistency behavior of S3 and optimize access patterns by + * coordinating with a strongly consistent external store for file system + * metadata. + */ [email protected] [email protected] +package org.apache.hadoop.fs.s3a.s3guard; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java index 862ce6b..ce79284 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3native/S3xLoginHelper.java @@ -105,6 +105,10 @@ public final class S3xLoginHelper { * @return a login tuple, possibly empty. */ public static Login extractLoginDetails(URI name) { + if (name == null) { + return Login.EMPTY; + } + try { String authority = name.getAuthority(); if (authority == null) { http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3guard.sh ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3guard.sh b/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3guard.sh new file mode 100644 index 0000000..039b077 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/shellprofile.d/hadoop-s3guard.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if ! declare -f hadoop_subcommand_s3guard >/dev/null 2>/dev/null; then + + if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then + hadoop_add_subcommand "s3guard" client "manage metadata on S3" + fi + + # this can't be indented otherwise shelldocs won't get it + +## @description s3guard command for hadoop +## @audience public +## @stability stable +## @replaceable yes +function hadoop_subcommand_s3guard +{ + # shellcheck disable=SC2034 + HADOOP_CLASSNAME=org.apache.hadoop.fs.s3a.s3guard.S3GuardTool + hadoop_add_to_classpath_tools hadoop-aws +} + +fi http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index 182f060..b8d37c6 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -46,6 +46,7 @@ See also: * [Testing](testing.html) * [Troubleshooting S3a](troubleshooting_s3a.html) +* [S3Guard](s3guard.html) ### Warning #1: Object Stores are not filesystems @@ -1552,7 +1553,7 @@ for `fs.s3a.server-side-encryption-algorithm` is `AES256`. SSE-KMS is where the user specifies a Customer Master Key(CMK) that is used to encrypt the objects. The user may specify a specific CMK or leave the -`fs.s3a.server-side-encryption-key` empty to use the default auto-generated key +`fs.s3a.server-side-encryption.key` empty to use the default auto-generated key in AWS IAM. Each CMK configured in AWS IAM is region specific, and cannot be used in a in a S3 bucket in a different region. There is can also be policies assigned to the CMK that prohibit or restrict its use for users causing S3A http://git-wip-us.apache.org/repos/asf/hadoop/blob/621b43e2/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md new file mode 100644 index 0000000..fe67d69 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md @@ -0,0 +1,610 @@ +<!--- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> + +# S3Guard: Consistency and Metadata Caching for S3A + +**Experimental Feature** + +<!-- MACRO{toc|fromDepth=0|toDepth=5} --> + +## Overview + +*S3Guard* is an experimental feature for the S3A client of the S3 object store, +which can use a (consistent) database as the store of metadata about objects +in an S3 bucket. + +S3Guard + +1. May improve performance on directory listing/scanning operations, +including those which take place during the partitioning period of query +execution, the process where files are listed and the work divided up amongst +processes. + +1. Permits a consistent view of the object store. Without this, changes in +objects may not be immediately visible, especially in listing operations. + +1. Offers a platform for future performance improvements for running Hadoop +workloads on top of object stores + +The basic idea is that, for each operation in the Hadoop S3 client (s3a) that +reads or modifies metadata, a shadow copy of that metadata is stored in a +separate MetadataStore implementation. Each MetadataStore implementation +offers HDFS-like consistency for the metadata, and may also provide faster +lookups for things like file status or directory listings. + +For links to early design documents and related patches, see +[HADOOP-13345](https://issues.apache.org/jira/browse/HADOOP-13345). + +*Important* + +* S3Guard is experimental and should be considered unstable. + +* While all underlying data is persisted in S3, if, for some reason, +the S3Guard-cached metadata becomes inconsistent with that in S3, +queries on the data may become incorrect. +For example, new datasets may be omitted, objects may be overwritten, +or clients may not be aware that some data has been deleted. +It is essential for all clients writing to an S3Guard-enabled +S3 Repository to use the feature. Clients reading the data may work directly +with the S3A data, in which case the normal S3 consistency guarantees apply. + + +## Setting up S3Guard + +The latest configuration parameters are defined in `core-default.xml`. You +should consult that file for full information, but a summary is provided here. + + +### 1. Choose the Database + +A core concept of S3Guard is that the directory listing data of the object +store, *the metadata* is replicated in a higher-performance, consistent, +database. In S3Guard, this database is called *The Metadata Store* + +By default, S3Guard is not enabled. + +The Metadata Store to use in production is bonded to Amazon's DynamoDB +database service. The following setting will enable this Metadata Store: + +```xml +<property> + <name>fs.s3a.metadatastore.impl</name> + <value>org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore</value> +</property> +``` + +Note that the `NullMetadataStore` store can be explicitly requested if desired. +This offers no metadata storage, and effectively disables S3Guard. + +```xml +<property> + <name>fs.s3a.metadatastore.impl</name> + <value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value> +</property> +``` + +### 2. Configure S3Guard Settings + +More settings will may be added in the future. +Currently the only Metadata Store-independent setting, besides the +implementation class above, is the *allow authoritative* flag. + +It is recommended that you leave the default setting here: + +```xml +<property> + <name>fs.s3a.metadatastore.authoritative</name> + <value>false</value> +</property> + +``` + +Setting this to `true` is currently an experimental feature. When true, the +S3A client will avoid round-trips to S3 when getting directory listings, if +there is a fully-cached version of the directory stored in the Metadata Store. + +Note that if this is set to true, it may exacerbate or persist existing race +conditions around multiple concurrent modifications and listings of a given +directory tree. + +In particular: **If the Metadata Store is declared as authoritative, +all interactions with the S3 bucket(s) must be through S3A clients sharing +the same Metadata Store** + + +### 3. Configure the Metadata Store. + +Here are the `DynamoDBMetadataStore` settings. Other Metadata Store +implementations will have their own configuration parameters. + + +### 4. Name Your Table + +First, choose the name of the table you wish to use for the S3Guard metadata +storage in your DynamoDB instance. If you leave it unset/empty, a +separate table will be created for each S3 bucket you access, and that +bucket's name will be used for the name of the DynamoDB table. For example, +this sets the table name to `my-ddb-table-name` + +```xml +<property> + <name>fs.s3a.s3guard.ddb.table</name> + <value>my-ddb-table-name</value> + <description> + The DynamoDB table name to operate. Without this property, the respective + S3 bucket names will be used. + </description> +</property> +``` + +It is good to share a table across multiple buckets for multiple reasons. + +1. You are billed for the I/O capacity allocated to the table, +*even when the table is not used*. Sharing capacity can reduce costs. + +1. You can share the "provision burden" across the buckets. That is, rather +than allocating for the peak load on a single bucket, you can allocate for +the peak load *across all the buckets*, which is likely to be significantly +lower. + +1. It's easier to measure and tune the load requirements and cost of +S3Guard, because there is only one table to review and configure in the +AWS management console. + +When wouldn't you want to share a table? + +1. When you do explicitly want to provision I/O capacity to a specific bucket +and table, isolated from others. + +1. When you are using separate billing for specific buckets allocated +to specific projects. + +1. When different users/roles have different access rights to different buckets. +As S3Guard requires all users to have R/W access to the table, all users will +be able to list the metadata in all buckets, even those to which they lack +read access. + +### 5. Locate your Table + +You may also wish to specify the region to use for DynamoDB. If a region +is not configured, S3A will assume that it is in the same region as the S3 +bucket. A list of regions for the DynamoDB service can be found in +[Amazon's documentation](http://docs.aws.amazon.com/general/latest/gr/rande.html#ddb_region). +In this example, to use the US West 2 region: + +```xml +<property> + <name>fs.s3a.s3guard.ddb.region</name> + <value>us-west-2</value> +</property> +``` + +When working with S3Guard-managed buckets from EC2 VMs running in AWS +infrastructure, using a local DynamoDB region ensures the lowest latency +and highest reliability, as well as avoiding all long-haul network charges. +The S3Guard tables, and indeed, the S3 buckets, should all be in the same +region as the VMs. + +### 6. Optional: Create your Table + +Next, you can choose whether or not the table will be automatically created +(if it doesn't already exist). If you want this feature, set the +`fs.s3a.s3guard.ddb.table.create` option to `true`. + +```xml +<property> + <name>fs.s3a.s3guard.ddb.table.create</name> + <value>true</value> + <description> + If true, the S3A client will create the table if it does not already exist. + </description> +</property> +``` + +### 7. If creating a table: Set your DynamoDB IO Capacity + +Next, you need to set the DynamoDB read and write throughput requirements you +expect to need for your cluster. Setting higher values will cost you more +money. *Note* that these settings only affect table creation when +`fs.s3a.s3guard.ddb.table.create` is enabled. To change the throughput for +an existing table, use the AWS console or CLI tool. + +For more details on DynamoDB capacity units, see the AWS page on [Capacity +Unit Calculations](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/WorkingWithTables.html#CapacityUnitCalculations). + +The charges are incurred per hour for the life of the table, *even when the +table and the underlying S3 buckets are not being used*. + +There are also charges incurred for data storage and for data IO outside of the +region of the DynamoDB instance. S3Guard only stores metadata in DynamoDB: path names +and summary details of objects âthe actual data is stored in S3, so billed at S3 +rates. + +```xml +<property> + <name>fs.s3a.s3guard.ddb.table.capacity.read</name> + <value>500</value> + <description> + Provisioned throughput requirements for read operations in terms of capacity + units for the DynamoDB table. This config value will only be used when + creating a new DynamoDB table, though later you can manually provision by + increasing or decreasing read capacity as needed for existing tables. + See DynamoDB documents for more information. + </description> +</property> + +<property> + <name>fs.s3a.s3guard.ddb.table.capacity.write</name> + <value>100</value> + <description> + Provisioned throughput requirements for write operations in terms of + capacity units for the DynamoDB table. Refer to related config + fs.s3a.s3guard.ddb.table.capacity.read before usage. + </description> +</property> +``` + +Attempting to perform more IO than the capacity requested simply throttles the +IO; small capacity numbers are recommended when initially experimenting +with S3Guard. + +## Authenticating with S3Guard + +The DynamoDB metadata store takes advantage of the fact that the DynamoDB +service uses the same authentication mechanisms as S3. S3Guard +gets all its credentials from the S3A client that is using it. + +All existing S3 authentication mechanisms can be used, except for one +exception. Credentials placed in URIs are not supported for S3Guard, for security +reasons. + +## Per-bucket S3Guard configuration + +In production, it is likely only some buckets will have S3Guard enabled; +those which are read-only may have disabled, for example. Equally importantly, +buckets in different regions should have different tables, each +in the relevant region. + +These options can be managed through S3A's [per-bucket configuration +mechanism](./index.html#Configuring_different_S3_buckets). +All options with the under `fs.s3a.bucket.BUCKETNAME.KEY` are propagated +to the options `fs.s3a.KEY` *for that bucket only*. + +As an example, here is a configuration to use different metadata stores +and tables for different buckets + +First, we define shortcuts for the metadata store classnames + + +```xml +<property> + <name>s3guard.null</name> + <value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value> +</property> + +<property> + <name>s3guard.dynamo</name> + <value>org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore</value> +</property> +``` + +Next, Amazon's public landsat database is configured with no +metadata store + +```xml +<property> + <name>fs.s3a.bucket.landsat-pds.metadatastore.impl</name> + <value>${s3guard.null}</value> + <description>The read-only landsat-pds repository isn't + managed by S3Guard</description> +</property> +``` + +Next the `ireland-2` and `ireland-offline` buckets are configured with +DynamoDB as the store, and a shared table `production-table` + + +```xml +<property> + <name>fs.s3a.bucket.ireland-2.metadatastore.impl</name> + <value>${s3guard.dynamo}</value> +</property> + +<property> + <name>fs.s3a.bucket.ireland-offline.metadatastore.impl</name> + <value>${s3guard.dynamo}</value> +</property> + +<property> + <name>fs.s3a.bucket.ireland-2.s3guard.ddb.table</name> + <value>production-table</value> +</property> +``` + +The region of this table is automatically set to be that of the buckets, +here `eu-west-1`; the same table name may actually be used in different +regions. + +Together then, this configuration enables the DynamoDB Metadata Store +for two buckets with a shared table, while disabling it for the public +bucket. + + +## S3Guard Command Line Interface (CLI) + +Note that in some cases an AWS region or `s3a://` URI can be provided. + +Metadata store URIs include a scheme that designates the backing store. For +example (e.g. `dynamodb://table_name`;). As documented above, the +AWS region can be inferred if the URI to an existing bucket is provided. + + +The S3A URI must also be provided for per-bucket configuration options +to be picked up. That is: when an s3a URL is provided on the command line, +all its "resolved" per-bucket settings are used to connect to, authenticate +with and configure the S3Guard table. If no such URL is provided, then +the base settings are picked up. + + +### Create a table: `s3guard init` + +```bash +hadoop s3guard init -meta URI ( -region REGION | s3a://BUCKET ) +``` + +Creates and initializes an empty metadata store. + +A DynamoDB metadata store can be initialized with additional parameters +pertaining to [Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html): + +```bash +[-write PROVISIONED_WRITES] [-read PROVISIONED_READS] +``` + +Example 1 + +```bash +hadoop s3guard init -meta dynamodb://ireland-team -write 5 -read 10 s3a://ireland-1 +``` + +Creates a table "ireland-team" with a capacity of 5 for writes, 10 for reads, +in the same location as the bucket "ireland-1". + + +Example 2 + +```bash +hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1 +``` + +Creates a table "ireland-team" in the same region "s3-eu-west-1.amazonaws.com" + + +### Import a bucket: `s3guard import` + +```bash +hadoop s3guard import [-meta URI] s3a://BUCKET +``` + +Pre-populates a metadata store according to the current contents of an S3 +bucket. If the `-meta` option is omitted, the binding information is taken +from the `core-site.xml` configuration. + +Example + +```bash +hadoop s3guard import s3a://ireland-1 +``` + +### Audit a table: `s3guard diff` + +```bash +hadoop s3guard diff [-meta URI] s3a://BUCKET +``` + +Lists discrepancies between a metadata store and bucket. Note that depending on +how S3Guard is used, certain discrepancies are to be expected. + +Example + +```bash +hadoop s3guard diff s3a://ireland-1 +``` + +### Delete a table: `s3guard destroy` + + +Deletes a metadata store. With DynamoDB as the store, this means +the specific DynamoDB table use to store the metadata. + +```bash +hadoop s3guard destroy [-meta URI] ( -region REGION | s3a://BUCKET ) +``` + +This *does not* delete the bucket, only the S3Guard table which it is bound +to. + + +Examples + +```bash +hadoop s3guard destroy s3a://ireland-1 +``` + +Deletes the table which the bucket ireland-1 is configured to use +as its MetadataStore. + +```bash +hadoop s3guard destroy -meta dynamodb://ireland-team -region eu-west-1 +``` + + + +### Clean up a table, `s3guard prune` + +Delete all file entries in the MetadataStore table whose object "modification +time" is older than the specified age. + +```bash +hadoop s3guard prune [-days DAYS] [-hours HOURS] [-minutes MINUTES] + [-seconds SECONDS] [-m URI] ( -region REGION | s3a://BUCKET ) +``` + +A time value must be supplied. + +1. This does not delete the entries in the bucket itself. +1. The modification time is effectively the creation time of the objects +in the S3 Bucket. +1. Even when an S3A URI is supplied, all entries in the table older than +a specific age are deleted — even those from other buckets. + +Example + +```bash +hadoop s3guard prune -days 7 s3a://ireland-1 +``` + +Deletes all entries in the S3Guard table for files older than seven days from +the table associated with `s3a://ireland-1`. + +```bash +hadoop s3guard prune -hours 1 -minutes 30 -meta dynamodb://ireland-team -region eu-west-1 +``` + +Delete all entries more than 90 minutes old from the table "ireland-team" in +the region "eu-west-1". + + + +## Debugging and Error Handling + +If you run into network connectivity issues, or have a machine failure in the +middle of an operation, you may end up with your metadata store having state +that differs from S3. The S3Guard CLI commands, covered in the CLI section +above, can be used to diagnose and repair these issues. + +There are some logs whose log level can be increased to provide more +information. + +```properties +# Log S3Guard classes +log4j.logger.org.apache.hadoop.fs.s3a.s3guard=DEBUG + +# Log all S3A classes +log4j.logger.org.apache.hadoop.fs.s3a=DEBUG + +# Enable debug logging of AWS DynamoDB client +log4j.logger.com.amazonaws.services.dynamodbv2.AmazonDynamoDB + +# Log all HTTP requests made; includes S3 interaction. This may +# include sensitive information such as account IDs in HTTP headers. +log4j.logger.com.amazonaws.request=DEBUG + +``` + +If all else fails, S3Guard is designed to allow for easy recovery by deleting +the metadata store data. In DynamoDB, this can be accomplished by simply +deleting the table, and allowing S3Guard to recreate it from scratch. Note +that S3Guard tracks recent changes to file metadata to implement consistency. +Deleting the metadata store table will simply result in a period of eventual +consistency for any file modifications that were made right before the table +was deleted. + +### Failure Semantics + +Operations which modify metadata will make changes to S3 first. If, and only +if, those operations succeed, the equivalent changes will be made to the +Metadata Store. + +These changes to S3 and Metadata Store are not fully-transactional: If the S3 +operations succeed, and the subsequent Metadata Store updates fail, the S3 +changes will *not* be rolled back. In this case, an error message will be +logged. + +### Versioning + +S3Guard tables are created with a version marker, an entry with the primary +key and child entry of `../VERSION`; the use of a relative path guarantees +that it will not be resolved. + +#### Versioning policy. + +1. The version number of an S3Guard table will only be incremented when +an incompatible change is made to the table structure âthat is, the structure +has changed so that it is no longer readable by older versions, or because +it has added new mandatory fields which older versions do not create. +1. The version number of S3Guard tables will only be changed by incrementing +the value. +1. Updated versions of S3Guard MAY continue to support older version tables. +1. If an incompatible change is made such that existing tables are not compatible, +then a means shall be provided to update existing tables. For example: +an option in the Command Line Interface, or an option to upgrade tables +during S3Guard initialization. + +*Note*: this policy does not indicate any intent to upgrade table structures +in an incompatible manner. The version marker in tables exists to support +such an option if it ever becomes necessary, by ensuring that all S3Guard +client can recognise any version mismatch. + +### Security + +All users of the DynamoDB table must have write access to it. This +effectively means they must have write access to the entire object store. + +There's not been much testing of using a S3Guard Metadata Store +with a read-only S3 Bucket. It *should* work, provided all users +have write access to the DynamoDB table. And, as updates to the Metadata Store +are only made after successful file creation, deletion and rename, the +store is *unlikely* to get out of sync, it is still something which +merits more testing before it could be considered reliable. + +### Troubleshooting + +#### Error: `S3Guard table lacks version marker.` + +The table which was intended to be used as a S3guard metadata store +does not have any version marker indicating that it is a S3Guard table. + +It may be that this is not a S3Guard table. + +* Make sure that this is the correct table name. +* Delete the table, so it can be rebuilt. + +#### Error: `Database table is from an incompatible S3Guard version` + +This indicates that the version of S3Guard which created (or possibly updated) +the database table is from a different version that that expected by the S3A +client. + +This error will also include the expected and actual version numbers. + +If the expected version is lower than the actual version, then the version +of the S3A client library is too old to interact with this S3Guard-managed +bucket. Upgrade the application/library. + +If the expected version is higher than the actual version, then the table +itself will need upgrading. + +#### Error `"DynamoDB table TABLE does not exist in region REGION; auto-creation is turned off"` + +S3Guard could not find the DynamoDB table for the Metadata Store, +and it was not configured to create it. Either the table was missing, +or the configuration is preventing S3Guard from finding the table. + +1. Verify that the value of `fs.s3a.s3guard.ddb.table` is correct. +1. If the region for an existing table has been set in +`fs.s3a.s3guard.ddb.region`, verify that the value is correct. +1. If the region is not set, verify that the table exists in the same +region as the bucket being used. +1. Create the table if necessary. --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
