Author: yhemanth
Date: Fri Jan 16 16:48:21 2009
New Revision: 735191
URL: http://svn.apache.org/viewvc?rev=735191&view=rev
Log:
HADOOP-5022. Provide an option to remove all log files older than the
configured time via logcondense. Contributed by Peeyush Bishnoi.
Modified:
hadoop/core/trunk/src/contrib/hod/CHANGES.txt
hadoop/core/trunk/src/contrib/hod/support/logcondense.py
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hod_admin_guide.xml
Modified: hadoop/core/trunk/src/contrib/hod/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/CHANGES.txt?rev=735191&r1=735190&r2=735191&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/CHANGES.txt (original)
+++ hadoop/core/trunk/src/contrib/hod/CHANGES.txt Fri Jan 16 16:48:21 2009
@@ -4,6 +4,10 @@
INCOMPATIBLE CHANGES
+ HADOOP-5022. Provide an option to remove all log files older
+ than the configured time via logcondense.
+ (Peeyush Bishnoi via yhemanth)
+
NEW FEATURES
IMPROVEMENTS
Modified: hadoop/core/trunk/src/contrib/hod/support/logcondense.py
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hod/support/logcondense.py?rev=735191&r1=735190&r2=735191&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hod/support/logcondense.py (original)
+++ hadoop/core/trunk/src/contrib/hod/support/logcondense.py Fri Jan 16
16:48:21 2009
@@ -95,9 +95,18 @@
'dest' : "dynamicdfs",
'metavar' : " ",
'default' : "false",
- 'help' : "'true', if the cluster is used to bring up dynamic
dfs clusters, 'false' otherwise"}
+ 'help' : "'true', if the cluster is used to bring up dynamic
dfs clusters, 'false' otherwise"},
+
+ {'short' : "-r",
+ 'long' : "--retain-master-logs",
+ 'type' : "string",
+ 'action' : "store",
+ 'dest' : "retain_masters_logs",
+ 'metavar' : " ",
+ 'default' : "false",
+ 'help' : "'true' if the logs of the masters(jobtracker and
namenode if '--dynamicdfs' is set) have to be retained, 'false' if everything
has to be removed"}
)
-
+
def getDfsCommand(options, args):
if (options.config == None):
cmd = options.package + " " + "dfs " + args
@@ -109,12 +118,19 @@
import shutil
options = process_args()
- # if the cluster is used to bring up dynamic dfs, we must leave NameNode and
JobTracker logs,
- # otherwise only JobTracker logs. Likewise, in case of dynamic dfs, we must
also look for
- # deleting datanode logs
- filteredNames = ['jobtracker']
- deletedNamePrefixes = ['*-tasktracker-*']
- if options.dynamicdfs == 'true':
+
+ # if the retain-master-logs option is true, we do not delete
+ # the jobtracker, and in case of dynamic dfs, namenode logs.
+ # else, we delete the entire job directory, as nothing other
+ # than master and slave log files should be under the hod-logs
+ # directory.
+ filteredNames = [] # logs to skip while deleting
+ deletedNamePrefixes = [] # logs prefixes to delete.
+ if options.retain_masters_logs == 'true':
+ filteredNames = ['jobtracker']
+ deletedNamePrefixes = ['*-tasktracker-*']
+
+ if options.dynamicdfs == 'true' and options.retain_masters_logs == 'true':
filteredNames.append('namenode')
deletedNamePrefixes.append('*-datanode-*')
@@ -167,13 +183,21 @@
for job in toPurge.keys():
try:
- for prefix in deletedNamePrefixes:
- cmd = getDfsCommand(options, "-rm " + toPurge[job] + '/' + prefix)
+ if options.retain_masters_logs == 'false':
+ # delete entire job-id directory.
+ cmd = getDfsCommand(options, "-rmr " + toPurge[job])
print cmd
- ret = 0
ret = os.system(cmd)
if (ret != 0):
- print >> sys.stderr, "Command failed to delete file " + cmd
+ print >> sys.stderr, "Command failed to delete job directory " + cmd
+ else:
+ # delete only the prefixes we're interested in.
+ for prefix in deletedNamePrefixes:
+ cmd = getDfsCommand(options, "-rm " + toPurge[job] + '/' + prefix)
+ print cmd
+ ret = os.system(cmd)
+ if (ret != 0):
+ print >> sys.stderr, "Command failed to delete file " + cmd
except Exception, e:
print >> sys.stderr, e
Modified:
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hod_admin_guide.xml
URL:
http://svn.apache.org/viewvc/hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hod_admin_guide.xml?rev=735191&r1=735190&r2=735191&view=diff
==============================================================================
---
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hod_admin_guide.xml
(original)
+++
hadoop/core/trunk/src/docs/src/documentation/content/xdocs/hod_admin_guide.xml
Fri Jan 16 16:48:21 2009
@@ -312,6 +312,16 @@
logs - a scenario that may be common in test clusters.</td>
<td>false</td>
</tr>
+ <tr>
+ <td>-r</td>
+ <td>--retain-master-logs</td>
+ <td>If true, this will keep the JobTracker logs of job in
hod-logs inside HDFS and it
+ will delete only the TaskTracker logs. Also, this will keep the
Namenode logs along with
+ JobTracker logs and will only delete the Datanode logs if
'dynamicdfs' options is set
+ to true. Otherwise, it will delete the complete job directory
from hod-logs inside
+ HDFS. By default it is set to false.</td>
+ <td>false</td>
+ </tr>
</table>
<p>So, for example, to delete all log files older than 7 days using a
hadoop-site.xml stored in
~/hadoop-conf, using the hadoop installation under ~/hadoop-0.17.0,
you could say:</p>