Author: arp
Date: Tue Oct 29 16:44:53 2013
New Revision: 1536801

URL: http://svn.apache.org/r1536801
Log:
Merging r1536182 through r1536558 from trunk to branch HDFS-2832

Added:
    
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestJobHistoryUtils.java
      - copied unchanged from r1536558, 
hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/test/java/org/apache/hadoop/mapreduce/v2/jobhistory/TestJobHistoryUtils.java
Modified:
    hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/   (props changed)
    hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/CHANGES.txt   
(contents, props changed)
    
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
    
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
    
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistory.java

Propchange: hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/
------------------------------------------------------------------------------
  Merged /hadoop/common/trunk/hadoop-mapreduce-project:r1536182-1536558

Modified: hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/CHANGES.txt?rev=1536801&r1=1536800&r2=1536801&view=diff
==============================================================================
--- hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/CHANGES.txt 
(original)
+++ hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/CHANGES.txt Tue 
Oct 29 16:44:53 2013
@@ -214,6 +214,9 @@ Release 2.2.1 - UNRELEASED
 
   OPTIMIZATIONS
 
+    MAPREDUCE-4680. Job history cleaner should only check timestamps of files 
in
+    old enough directories (Robert Kanter via Sandy Ryza)
+
   BUG FIXES
 
     MAPREDUCE-5569. FloatSplitter is not generating correct splits (Nathan

Propchange: 
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/CHANGES.txt
------------------------------------------------------------------------------
  Merged 
/hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt:r1536182-1536558

Modified: 
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java?rev=1536801&r1=1536800&r2=1536801&view=diff
==============================================================================
--- 
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
 (original)
+++ 
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/src/main/java/org/apache/hadoop/mapreduce/v2/jobhistory/JobHistoryUtils.java
 Tue Oct 29 16:44:53 2013
@@ -21,6 +21,7 @@ package org.apache.hadoop.mapreduce.v2.j
 import java.io.File;
 import java.io.IOException;
 import java.util.Calendar;
+import java.util.ArrayList;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.concurrent.atomic.AtomicBoolean;
@@ -499,4 +500,72 @@ public class JobHistoryUtils {
     return fc.makeQualified(JobHistoryUtils.getStagingJobHistoryFile(
         histDirPath,jobId, (applicationAttemptId.getAttemptId() - 1)));
   }
+
+  /**
+   * Looks for the dirs to clean.  The folder structure is YYYY/MM/DD/Serial so
+   * we can use that to more efficiently find the directories to clean by
+   * comparing the cutoff timestamp with the timestamp from the folder
+   * structure.
+   *
+   * @param fc done dir FileContext
+   * @param root folder for completed jobs
+   * @param cutoff The cutoff for the max history age
+   * @return The list of directories for cleaning
+   * @throws IOException
+   */
+  public static List<FileStatus> getHistoryDirsForCleaning(FileContext fc,
+      Path root, long cutoff) throws IOException {
+    List<FileStatus> fsList = new ArrayList<FileStatus>();
+    Calendar cCal = Calendar.getInstance();
+    cCal.setTimeInMillis(cutoff);
+    int cYear = cCal.get(Calendar.YEAR);
+    int cMonth = cCal.get(Calendar.MONTH) + 1;
+    int cDate = cCal.get(Calendar.DATE);
+
+    RemoteIterator<FileStatus> yearDirIt = fc.listStatus(root);
+    while (yearDirIt.hasNext()) {
+      FileStatus yearDir = yearDirIt.next();
+      try {
+        int year = Integer.parseInt(yearDir.getPath().getName());
+        if (year <= cYear) {
+          RemoteIterator<FileStatus> monthDirIt =
+              fc.listStatus(yearDir.getPath());
+          while (monthDirIt.hasNext()) {
+            FileStatus monthDir = monthDirIt.next();
+            try {
+              int month = Integer.parseInt(monthDir.getPath().getName());
+              // If we only checked the month here, then something like 07/2013
+              // would incorrectly not pass when the cutoff is 06/2014
+              if (year < cYear || month <= cMonth) {
+                RemoteIterator<FileStatus> dateDirIt =
+                    fc.listStatus(monthDir.getPath());
+                while (dateDirIt.hasNext()) {
+                  FileStatus dateDir = dateDirIt.next();
+                  try {
+                    int date = Integer.parseInt(dateDir.getPath().getName());
+                    // If we only checked the date here, then something like
+                    // 07/21/2013 would incorrectly not pass when the cutoff is
+                    // 08/20/2013 or 07/20/2012
+                    if (year < cYear || month < cMonth || date <= cDate) {
+                      fsList.addAll(remoteIterToList(
+                          fc.listStatus(dateDir.getPath())));
+                    }
+                  } catch (NumberFormatException nfe) {
+                    // the directory didn't fit the format we're looking for so
+                    // skip the dir
+                  }
+                }
+              }
+            } catch (NumberFormatException nfe) {
+              // the directory didn't fit the format we're looking for so skip
+              // the dir
+            }
+          }
+        }
+      } catch (NumberFormatException nfe) {
+        // the directory didn't fit the format we're looking for so skip the 
dir
+      }
+    }
+    return fsList;
+  }
 }

Modified: 
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java?rev=1536801&r1=1536800&r2=1536801&view=diff
==============================================================================
--- 
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
 (original)
+++ 
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryFileManager.java
 Tue Oct 29 16:44:53 2013
@@ -924,6 +924,11 @@ public class HistoryFileManager extends 
     fileInfo.delete();
   }
 
+  List<FileStatus> getHistoryDirsForCleaning(long cutoff) throws IOException {
+      return JobHistoryUtils.
+        getHistoryDirsForCleaning(doneDirFc, doneDirPrefixPath, cutoff);
+  }
+
   /**
    * Clean up older history files.
    * 
@@ -932,12 +937,9 @@ public class HistoryFileManager extends 
    */
   @SuppressWarnings("unchecked")
   void clean() throws IOException {
-    // TODO this should be replaced by something that knows about the directory
-    // structure and will put less of a load on HDFS.
     long cutoff = System.currentTimeMillis() - maxHistoryAge;
     boolean halted = false;
-    // TODO Delete YYYY/MM/DD directories.
-    List<FileStatus> serialDirList = findTimestampedDirectories();
+    List<FileStatus> serialDirList = getHistoryDirsForCleaning(cutoff);
     // Sort in ascending order. Relies on YYYY/MM/DD/Serial
     Collections.sort(serialDirList);
     for (FileStatus serialDir : serialDirList) {

Modified: 
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistory.java
URL: 
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistory.java?rev=1536801&r1=1536800&r2=1536801&view=diff
==============================================================================
--- 
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistory.java
 (original)
+++ 
hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistory.java
 Tue Oct 29 16:44:53 2013
@@ -37,6 +37,7 @@ import org.apache.hadoop.mapreduce.v2.jo
 import org.apache.hadoop.mapreduce.v2.jobhistory.JobHistoryUtils;
 import org.junit.After;
 import org.junit.Test;
+import org.mockito.Mockito;
 
 import static org.junit.Assert.assertEquals;
 import static org.mockito.Mockito.*;
@@ -175,7 +176,8 @@ public class TestJobHistory {
     doReturn(list2).when(historyManager).scanDirectoryForHistoryFiles(
         eq(donePathToday), any(FileContext.class));
 
-    doReturn(fileStatusList).when(historyManager).findTimestampedDirectories();
+    doReturn(fileStatusList).when(historyManager)
+        .getHistoryDirsForCleaning(Mockito.anyLong());
     doReturn(true).when(historyManager).deleteDir(any(FileStatus.class));
 
     JobListCache jobListCache = mock(JobListCache.class);


Reply via email to