Author: yhemanth
Date: Fri Jan 16 16:48:21 2009
New Revision: 735191

URL: http://svn.apache.org/viewvc?rev=735191&view=rev
Log:
HADOOP-5022. Provide an option to remove all log files older than the 
configured time via logcondense. Contributed by Peeyush Bishnoi.

Modified:
    hadoop/core/trunk/src/contrib/hod/CHANGES.txt
    hadoop/core/trunk/src/contrib/hod/support/logcondense.py
    
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hod_admin_guide.xml

Modified: hadoop/core/trunk/src/contrib/hod/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/CHANGES.txt?rev=735191&r1=735190&r2=735191&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/CHANGES.txt (original)
+++ hadoop/core/trunk/src/contrib/hod/CHANGES.txt Fri Jan 16 16:48:21 2009
@@ -4,6 +4,10 @@
 
   INCOMPATIBLE CHANGES
 
+    HADOOP-5022. Provide an option to remove all log files older
+    than the configured time via logcondense.
+    (Peeyush Bishnoi via yhemanth)
+
   NEW FEATURES
 
   IMPROVEMENTS

Modified: hadoop/core/trunk/src/contrib/hod/support/logcondense.py
URL: 
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/support/logcondense.py?rev=735191&r1=735190&r2=735191&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/support/logcondense.py (original)
+++ hadoop/core/trunk/src/contrib/hod/support/logcondense.py Fri Jan 16 
16:48:21 2009
@@ -95,9 +95,18 @@
             'dest'    : "dynamicdfs",
             'metavar' : " ",
             'default' : "false",
-            'help'    : "'true', if the cluster is used to bring up dynamic 
dfs clusters, 'false' otherwise"}
+            'help'    : "'true', if the cluster is used to bring up dynamic 
dfs clusters, 'false' otherwise"},
+              
+           {'short'   : "-r",
+            'long'    : "--retain-master-logs",
+            'type'    : "string",
+            'action'  : "store",
+            'dest'    : "retain_masters_logs",
+            'metavar' : " ",
+            'default' : "false",
+            'help'    : "'true' if the logs of the masters(jobtracker and 
namenode if '--dynamicdfs' is set) have to be retained, 'false' if everything 
has to be removed"}
            )
-
+ 
 def getDfsCommand(options, args):
   if (options.config == None): 
     cmd = options.package + " " + "dfs " + args
@@ -109,12 +118,19 @@
   import shutil
   
   options = process_args()
-  # if the cluster is used to bring up dynamic dfs, we must leave NameNode and 
JobTracker logs, 
-  # otherwise only JobTracker logs. Likewise, in case of dynamic dfs, we must 
also look for
-  # deleting datanode logs
-  filteredNames = ['jobtracker']
-  deletedNamePrefixes = ['*-tasktracker-*']
-  if options.dynamicdfs == 'true':
+ 
+  # if the retain-master-logs option is true, we do not delete
+  # the jobtracker, and in case of dynamic dfs, namenode logs.
+  # else, we delete the entire job directory, as nothing other
+  # than master and slave log files should be under the hod-logs
+  # directory. 
+  filteredNames = [] # logs to skip while deleting
+  deletedNamePrefixes = [] # logs prefixes to delete.
+  if options.retain_masters_logs == 'true':
+    filteredNames = ['jobtracker']
+    deletedNamePrefixes = ['*-tasktracker-*']
+
+  if options.dynamicdfs == 'true' and options.retain_masters_logs == 'true':
     filteredNames.append('namenode')
     deletedNamePrefixes.append('*-datanode-*')
 
@@ -167,13 +183,21 @@
 
   for job in toPurge.keys():
     try:
-      for prefix in deletedNamePrefixes:
-        cmd = getDfsCommand(options, "-rm " + toPurge[job] + '/' + prefix)
+      if options.retain_masters_logs == 'false':
+        # delete entire job-id directory.
+        cmd = getDfsCommand(options, "-rmr " + toPurge[job])
         print cmd
-        ret = 0
         ret = os.system(cmd)
         if (ret != 0):
-          print >> sys.stderr, "Command failed to delete file " + cmd 
+          print >> sys.stderr, "Command failed to delete job directory " + cmd
+      else:
+        # delete only the prefixes we're interested in.
+        for prefix in deletedNamePrefixes:
+          cmd = getDfsCommand(options, "-rm " + toPurge[job] + '/' + prefix)
+          print cmd
+          ret = os.system(cmd)
+          if (ret != 0):
+            print >> sys.stderr, "Command failed to delete file " + cmd 
     except Exception, e:
       print >> sys.stderr, e
          

Modified: 
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hod_admin_guide.xml
URL: 
http://svn.apache.org/viewvc/hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hod_admin_guide.xml?rev=735191&r1=735190&r2=735191&view=diff
==============================================================================
--- 
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hod_admin_guide.xml 
(original)
+++ 
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hod_admin_guide.xml 
Fri Jan 16 16:48:21 2009
@@ -312,6 +312,16 @@
               logs - a scenario that may be common in test clusters.</td>
               <td>false</td>
             </tr>
+            <tr>
+              <td>-r</td>
+              <td>--retain-master-logs</td>
+              <td>If true, this will keep the JobTracker logs of job in 
hod-logs inside HDFS and it 
+              will delete only the TaskTracker logs. Also, this will keep the 
Namenode logs along with 
+              JobTracker logs and will only delete the Datanode logs if 
'dynamicdfs' options is set 
+              to true. Otherwise, it will delete the complete job directory 
from hod-logs inside 
+              HDFS. By default it is set to false.</td>
+              <td>false</td>
+            </tr>
           </table>
         <p>So, for example, to delete all log files older than 7 days using a 
hadoop-site.xml stored in
         ~/hadoop-conf, using the hadoop installation under ~/hadoop-0.17.0, 
you could say:</p>


Reply via email to