umamaheswararao commented on a change in pull request #2229: URL: https://github.com/apache/hadoop/pull/2229#discussion_r471048505
########## File path: hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/ViewDistributedFileSystem.java ########## @@ -0,0 +1,1864 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * <p> + * http://www.apache.org/licenses/LICENSE-2.0 + * <p> + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs; + +import com.google.common.base.Preconditions; +import org.apache.hadoop.HadoopIllegalArgumentException; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.crypto.key.KeyProvider; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.BlockStoragePolicySpi; +import org.apache.hadoop.fs.CacheFlag; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.CreateFlag; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileChecksum; +import org.apache.hadoop.fs.FileEncryptionInfo; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FsServerDefaults; +import org.apache.hadoop.fs.FsStatus; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Options; +import org.apache.hadoop.fs.PartialListing; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.fs.PathHandle; +import org.apache.hadoop.fs.QuotaUsage; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.fs.XAttrSetFlag; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.fs.viewfs.ViewFileSystem; +import org.apache.hadoop.fs.viewfs.ViewFileSystemOverloadScheme; +import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; +import org.apache.hadoop.hdfs.protocol.AddErasureCodingPolicyResponse; +import org.apache.hadoop.hdfs.protocol.BlockStoragePolicy; +import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; +import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; +import org.apache.hadoop.hdfs.protocol.CachePoolEntry; +import org.apache.hadoop.hdfs.protocol.CachePoolInfo; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.ECTopologyVerifierResult; +import org.apache.hadoop.hdfs.protocol.EncryptionZone; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; +import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo; +import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.HdfsPathHandle; +import org.apache.hadoop.hdfs.protocol.OpenFileEntry; +import org.apache.hadoop.hdfs.protocol.OpenFilesIterator; +import org.apache.hadoop.hdfs.protocol.RollingUpgradeInfo; +import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport; +import org.apache.hadoop.hdfs.protocol.SnapshotDiffReportListing; +import org.apache.hadoop.hdfs.protocol.SnapshottableDirectoryStatus; +import org.apache.hadoop.hdfs.protocol.ZoneReencryptionStatus; +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.token.DelegationTokenIssuer; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.util.Progressable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileNotFoundException; +import java.io.IOException; + +import java.net.InetSocketAddress; +import java.net.URI; +import java.util.ArrayList; +import java.util.Collection; +import java.util.EnumSet; +import java.util.List; +import java.util.Map; + +/** + * The ViewDistributedFileSystem is an extended class to DistributedFileSystem + * with additional mounting functionality. The goal is to have better API + * compatibility for HDFS users when using mounting + * filesystem(ViewFileSystemOverloadScheme). + * The ViewFileSystemOverloadScheme{@link ViewFileSystemOverloadScheme} is a new + * filesystem with inherited mounting functionality from ViewFileSystem. + * For the user who is using ViewFileSystemOverloadScheme by setting + * fs.hdfs.impl=org.apache.hadoop.fs.viewfs.ViewFileSystemOverloadScheme, now + * they can set fs.hdfs.impl=org.apache.hadoop.hdfs.ViewDistributedFileSystem. + * So, that the hdfs users will get closely compatible API with mount + * functionality. For the rest of all other schemes can continue to use + * ViewFileSystemOverloadScheme class directly for mount functionality. Please + * note that ViewFileSystemOverloadScheme provides only + * ViewFileSystem{@link ViewFileSystem} APIs. + * If user configured this class but no mount point configured? Then it will + * simply work as existing DistributedFileSystem class. If user configured both + * fs.hdfs.impl to this class and mount configurations, then users will be able + * to make calls the APIs available in this class, they are nothing but DFS + * APIs, but they will be delegated to viewfs functionality. Please note, APIs + * without any path in arguments( ex: isInSafeMode), will be delegated to + * default filesystem only, that is the configured fallback link. If you want to + * make these API calls on specific child filesystem, you may want to initialize + * them separately and call. In ViewDistributedFileSystem, linkFallBack is + * mandatory when you ass mount links and it must be to your base cluster, + * usually your current fs.defaultFS if that's pointing to hdfs. + */ +public class ViewDistributedFileSystem extends DistributedFileSystem { + private static final Logger LOGGER = + LoggerFactory.getLogger(ViewDistributedFileSystem.class); + + // A mounting file system. + private ViewFileSystemOverloadScheme vfs; + // A default DFS, which should have set via linkFallback + private DistributedFileSystem defaultDFS; + + @Override + public void initialize(URI uri, Configuration conf) throws IOException { + super.initialize(uri, conf); + try { + this.vfs = tryInitializeMountingViewFs(uri, conf); + } catch (IOException ioe) { + LOGGER.debug( + "Mount tree initialization failed with the reason => {}. Falling" + + " back to regular DFS initialization. Please" + " re-initialize" + + " the fs after updating mount point.", + ioe.getMessage()); + // Re-initialize, so that initDFSClient will initialize DFSClient to work + // same as DistributedFileSystem. + super.initialize(uri, conf); + return; + } + setConf(conf); + // A child DFS with the current initialized URI. This must be same as + // fallback fs. The fallback must point to root of your filesystems. + // Some APIs(without path in argument, for example isInSafeMode) will + // support only for base cluster filesystem. Only that APIs will use this + // fs. + defaultDFS = (DistributedFileSystem) this.vfs.getFallbackFileSystem(); + Preconditions + .checkNotNull("In ViewHDFS fallback link is mandatory.", defaultDFS); + // Please don't access internal dfs directly except in tests. Review comment: Why I tried to make this mandatory was, there are few APIs currently we are delegating to defaultfs. I am just worried we will have too many behaviors. :-) Let me think on it. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
