Author: tucu Date: Thu Jan 16 17:12:31 2014 New Revision: 1558853 URL: http://svn.apache.org/r1558853 Log: MAPREDUCE-5724. JobHistoryServer does not start if HDFS is not running. (tucu)
Added: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java - copied unchanged from r1558852, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestHistoryFileManager.java Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt?rev=1558853&r1=1558852&r2=1558853&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/CHANGES.txt Thu Jan 16 17:12:31 2014 @@ -126,6 +126,9 @@ Release 2.4.0 - UNRELEASED MAPREDUCE-5689. MRAppMaster does not preempt reducers when scheduled maps cannot be fulfilled. (lohit via kasha) + MAPREDUCE-5724. JobHistoryServer does not start if HDFS is not running. + (tucu) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java?rev=1558853&r1=1558852&r2=1558853&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java Thu Jan 16 17:12:31 2014 @@ -78,6 +78,13 @@ public class JHAdminConfig { MR_HISTORY_PREFIX + "done-dir"; /** + * Maximum time the History server will wait for the FileSystem for History + * files to become available. Default value is -1, forever. + */ + public static final String MR_HISTORY_MAX_START_WAIT_TIME = + MR_HISTORY_PREFIX + "maximum-start-wait-time-millis"; + public static final long DEFAULT_MR_HISTORY_MAX_START_WAIT_TIME = -1; + /** * Path where history files should be stored after a job finished and before * they are pulled into the job history server. **/ Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml?rev=1558853&r1=1558852&r2=1558853&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml Thu Jan 16 17:12:31 2014 @@ -35,6 +35,10 @@ <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-common</artifactId> </dependency> <dependency> @@ -53,6 +57,12 @@ <type>test-jar</type> <scope>test</scope> </dependency> + <dependency> + <groupId>org.apache.hadoop</groupId> + <artifactId>hadoop-hdfs</artifactId> + <type>test-jar</type> + <scope>test</scope> + </dependency> </dependencies> <build> Modified: hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java?rev=1558853&r1=1558852&r2=1558853&view=diff ============================================================================== --- hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java (original) +++ hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java Thu Jan 16 17:12:31 2014 @@ -20,6 +20,7 @@ package org.apache.hadoop.mapreduce.v2.h import java.io.FileNotFoundException; import java.io.IOException; +import java.net.ConnectException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -69,6 +70,8 @@ import org.apache.hadoop.yarn.exceptions import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.apache.hadoop.yarn.util.Clock; +import org.apache.hadoop.yarn.util.SystemClock; /** * This class provides a way to interact with history files in a thread safe @@ -464,7 +467,8 @@ public class HistoryFileManager extends private JobACLsManager aclsMgr; - private Configuration conf; + @VisibleForTesting + Configuration conf; private String serialNumberFormat; @@ -491,36 +495,10 @@ public class HistoryFileManager extends + (JobHistoryUtils.SERIAL_NUMBER_DIRECTORY_DIGITS + serialNumberLowDigits) + "d"); - String doneDirPrefix = null; - doneDirPrefix = JobHistoryUtils - .getConfiguredHistoryServerDoneDirPrefix(conf); - try { - doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified( - new Path(doneDirPrefix)); - doneDirFc = FileContext.getFileContext(doneDirPrefixPath.toUri(), conf); - doneDirFc.setUMask(JobHistoryUtils.HISTORY_DONE_DIR_UMASK); - mkdir(doneDirFc, doneDirPrefixPath, new FsPermission( - JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION)); - } catch (IOException e) { - throw new YarnRuntimeException("Error creating done directory: [" - + doneDirPrefixPath + "]", e); - } - - String intermediateDoneDirPrefix = null; - intermediateDoneDirPrefix = JobHistoryUtils - .getConfiguredHistoryIntermediateDoneDirPrefix(conf); - try { - intermediateDoneDirPath = FileContext.getFileContext(conf).makeQualified( - new Path(intermediateDoneDirPrefix)); - intermediateDoneDirFc = FileContext.getFileContext( - intermediateDoneDirPath.toUri(), conf); - mkdir(intermediateDoneDirFc, intermediateDoneDirPath, new FsPermission( - JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort())); - } catch (IOException e) { - LOG.info("error creating done directory on dfs " + e); - throw new YarnRuntimeException("Error creating intermediate done directory: [" - + intermediateDoneDirPath + "]", e); - } + long maxFSWaitTime = conf.getLong( + JHAdminConfig.MR_HISTORY_MAX_START_WAIT_TIME, + JHAdminConfig.DEFAULT_MR_HISTORY_MAX_START_WAIT_TIME); + createHistoryDirs(new SystemClock(), 10 * 1000, maxFSWaitTime); this.aclsMgr = new JobACLsManager(conf); @@ -544,6 +522,107 @@ public class HistoryFileManager extends super.serviceInit(conf); } + @VisibleForTesting + void createHistoryDirs(Clock clock, long intervalCheckMillis, + long timeOutMillis) throws IOException { + long start = clock.getTime(); + boolean done = false; + int counter = 0; + while (!done && + ((timeOutMillis == -1) || (clock.getTime() - start < timeOutMillis))) { + done = tryCreatingHistoryDirs(counter++ % 3 == 0); // log every 3 attempts, 30sec + try { + Thread.sleep(intervalCheckMillis); + } catch (InterruptedException ex) { + throw new YarnRuntimeException(ex); + } + } + if (!done) { + throw new YarnRuntimeException("Timed out '" + timeOutMillis+ + "ms' waiting for FileSystem to become available"); + } + } + + /** + * DistributedFileSystem returns a RemoteException with a message stating + * SafeModeException in it. So this is only way to check it is because of + * being in safe mode. + */ + private boolean isBecauseSafeMode(Throwable ex) { + return ex.toString().contains("SafeModeException"); + } + + /** + * Returns TRUE if the history dirs were created, FALSE if they could not + * be created because the FileSystem is not reachable or in safe mode and + * throws and exception otherwise. + */ + @VisibleForTesting + boolean tryCreatingHistoryDirs(boolean logWait) throws IOException { + boolean succeeded = true; + String doneDirPrefix = JobHistoryUtils. + getConfiguredHistoryServerDoneDirPrefix(conf); + try { + doneDirPrefixPath = FileContext.getFileContext(conf).makeQualified( + new Path(doneDirPrefix)); + doneDirFc = FileContext.getFileContext(doneDirPrefixPath.toUri(), conf); + doneDirFc.setUMask(JobHistoryUtils.HISTORY_DONE_DIR_UMASK); + mkdir(doneDirFc, doneDirPrefixPath, new FsPermission( + JobHistoryUtils.HISTORY_DONE_DIR_PERMISSION)); + } catch (ConnectException ex) { + if (logWait) { + LOG.info("Waiting for FileSystem at " + + doneDirPrefixPath.toUri().getAuthority() + "to be available"); + } + succeeded = false; + } catch (IOException e) { + if (isBecauseSafeMode(e)) { + succeeded = false; + if (logWait) { + LOG.info("Waiting for FileSystem at " + + doneDirPrefixPath.toUri().getAuthority() + + "to be out of safe mode"); + } + } else { + throw new YarnRuntimeException("Error creating done directory: [" + + doneDirPrefixPath + "]", e); + } + } + if (succeeded) { + String intermediateDoneDirPrefix = JobHistoryUtils. + getConfiguredHistoryIntermediateDoneDirPrefix(conf); + try { + intermediateDoneDirPath = FileContext.getFileContext(conf).makeQualified( + new Path(intermediateDoneDirPrefix)); + intermediateDoneDirFc = FileContext.getFileContext( + intermediateDoneDirPath.toUri(), conf); + mkdir(intermediateDoneDirFc, intermediateDoneDirPath, new FsPermission( + JobHistoryUtils.HISTORY_INTERMEDIATE_DONE_DIR_PERMISSIONS.toShort())); + } catch (ConnectException ex) { + succeeded = false; + if (logWait) { + LOG.info("Waiting for FileSystem at " + + intermediateDoneDirPath.toUri().getAuthority() + + "to be available"); + } + } catch (IOException e) { + if (isBecauseSafeMode(e)) { + succeeded = false; + if (logWait) { + LOG.info("Waiting for FileSystem at " + + intermediateDoneDirPath.toUri().getAuthority() + + "to be out of safe mode"); + } + } else { + throw new YarnRuntimeException( + "Error creating intermediate done directory: [" + + intermediateDoneDirPath + "]", e); + } + } + } + return succeeded; + } + @Override public void serviceStop() throws Exception { ShutdownThreadsHelper.shutdownExecutorService(moveToDoneExecutor);