MAPREDUCE-7101. Add config parameter to allow JHS to alway scan user dir 
irrespective of modTime. (Thomas Marquardt via asuresh)


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/5670e89b
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/5670e89b
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/5670e89b

Branch: refs/heads/HADOOP-15461
Commit: 5670e89b2ec69ab71e32dcd5acbd3a57ca6abea5
Parents: aeaf9fe
Author: Arun Suresh <asur...@apache.org>
Authored: Tue Jun 12 15:36:52 2018 -0700
Committer: Arun Suresh <asur...@apache.org>
Committed: Tue Jun 12 15:36:52 2018 -0700

----------------------------------------------------------------------
 .../hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java       | 9 +++++++--
 .../src/main/resources/mapred-default.xml                   | 9 +++++++++
 .../apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java   | 8 +++++++-
 3 files changed, 23 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/5670e89b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java
index 1cadf84..9e964e1 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JHAdminConfig.java
@@ -61,8 +61,13 @@ public class JHAdminConfig {
     MR_HISTORY_PREFIX + "cleaner.interval-ms";
   public static final long DEFAULT_MR_HISTORY_CLEANER_INTERVAL_MS = 
     1 * 24 * 60 * 60 * 1000l; //1 day
-  
-  
+
+  /** Always scan user dir, irrespective of dir modification time.*/
+  public static final String MR_HISTORY_ALWAYS_SCAN_USER_DIR =
+      MR_HISTORY_PREFIX + "always-scan-user-dir";
+  public static final boolean DEFAULT_MR_HISTORY_ALWAYS_SCAN_USER_DIR =
+      false;
+
   /** The number of threads to handle client API requests.*/
   public static final String MR_HISTORY_CLIENT_THREAD_COUNT = 
     MR_HISTORY_PREFIX + "client.thread-count";

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5670e89b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
index dcb312c..9f33d65 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml
@@ -1775,6 +1775,15 @@
 </property>
 
 <property>
+  <name>mapreduce.jobhistory.always-scan-user-dir</name>
+  <value>false</value>
+  <description>Some Cloud FileSystems do not currently update the
+  modification time of directories. To support these filesystems, this
+  configuration value should be set to 'true'.
+  </description>
+</property>
+
+<property>
   <name>mapreduce.jobhistory.done-dir</name>
   <value>${yarn.app.mapreduce.am.staging-dir}/history/done</value>
   <description></description>

http://git-wip-us.apache.org/repos/asf/hadoop/blob/5670e89b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
----------------------------------------------------------------------
diff --git 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
index a07ca26..7fe99a2 100644
--- 
a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
+++ 
b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
@@ -324,7 +324,13 @@ public class HistoryFileManager extends AbstractService {
       // so we need to have additional check.
       // Note: modTime (X second Y millisecond) could be casted to X second or
       // X+1 second.
-      if (modTime != newModTime
+      // MAPREDUCE-7101: Some Cloud FileSystems do not currently update the
+      // modification time of directories. For these, we scan every time if
+      // the 'alwaysScan' is true.
+      boolean alwaysScan = conf.getBoolean(
+          JHAdminConfig.MR_HISTORY_ALWAYS_SCAN_USER_DIR,
+          JHAdminConfig.DEFAULT_MR_HISTORY_ALWAYS_SCAN_USER_DIR);
+      if (alwaysScan || modTime != newModTime
           || (scanTime/1000) == (modTime/1000)
           || (scanTime/1000 + 1) == (modTime/1000)) {
         // reset scanTime before scanning happens


---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-commits-h...@hadoop.apache.org

Reply via email to