Author: rgardler
Date: Wed Nov 17 20:42:14 2010
New Revision: 1036216
URL: http://svn.apache.org/viewvc?rev=1036216&view=rev
Log:
allow limiting of stats to a particular list
Modified:
labs/agora/src/python/process.py
Modified: labs/agora/src/python/process.py
URL:
http://svn.apache.org/viewvc/labs/agora/src/python/process.py?rev=1036216&r1=1036215&r2=1036216&view=diff
==============================================================================
--- labs/agora/src/python/process.py (original)
+++ labs/agora/src/python/process.py Wed Nov 17 20:42:14 2010
@@ -27,9 +27,12 @@ Where OPTIONS is one or more of:
-s
display stats from the history file. These stats describe processing
that has been carried out in the past.
+ -l LIST
+ only process the mailing list identified
-m PATH
set the path to the directory that contains the mailbox to process
- [default to './mail']
+ [default to './mail']. The directory should contain subdirectories
+ for each mailing list to be processed.
-d PATH
set the path to the directory that will contain the processed data
[defaults to './data']
@@ -47,6 +50,7 @@ import os, re, sys, time, getopt, pickle
program = sys.argv[0]
loud = True
force = False
+list = None
#-----------------------------------------------------------------------#
@@ -192,11 +196,12 @@ def process(read,write):
mbox_pattern = re.compile(r'^.*?(\d{4})-?(\d{2})(:?.gz)?',re.S)
def crawl(history,archives,input_path,output_path,file):
+ global list
input = os.path.join(input_path,file)
if os.path.isdir(input):
if loud:
- print "Process directory", input
+ print "Process directory", input
output = os.path.join(output_path,file)
if (not os.path.exists(output)):
os.mkdir(output)
@@ -246,10 +251,10 @@ def usage(code, msg=''):
def main():
""" Main program; parse options and go. """
- global loud, force
+ global loud, force, list
try:
- opts, args = getopt.getopt(sys.argv[1:], 'hsfqvd:m:p:')
+ opts, args = getopt.getopt(sys.argv[1:], 'hsfqvd:m:p:l:')
except getopt.error, msg:
usage(2, msg)
@@ -260,7 +265,11 @@ def main():
for opt, arg in opts:
if opt == '-h':
usage(0)
- if opt == '-s':
+ elif opt == "-l":
+ list = arg
+ if loud:
+ print "Limit processing to", list
+ elif opt == "-s":
display_stats()
sys.exit(0)
elif opt == "-q":
@@ -301,23 +310,36 @@ def main():
#-----------------------------------------------------------------------#
def display_stats():
+ global list
+
history = load_history()
for file in history:
- last_modified = history[file][0]
- stats = history[file][1]
- total = stats[0]
- valid = stats[1]
- invalid = stats[2]
- error = stats[3]
- missing_date = stats[4]
- missing_address = stats[5]
- missing_msgID = stats[6]
- missing_backlink = stats[7]
+ if list is not None:
+ if list in file:
+ last_modified = history[file][0]
+ stats = history[file][1]
+ total = int(stats[0])
+ valid = stats[1]
+ invalid = stats[2]
+ error = stats[3]
+ missing_date = stats[4]
+ missing_address = stats[5]
+ missing_msgID = stats[6]
+ missing_backlink = int(stats[7])
+ with_backlink = total - missing_backlink
+ in_conversation = round(float(with_backlink) / float (total) *
100, 1)
- print file
- print "Total emails", total
- print "Not in reply-to", missing_backlink
- print
+ start = file.index(list) + len(list) + 1
+ if file[-3:] == '.gz':
+ end = len(file) - 3
+ else:
+ end = len(file)
+
+ print file[start:end]
+ print "Total emails", total
+ print "Not in reply-to", missing_backlink
+ print "% in conversation", in_conversation
+ print
#-----------------------------------------------------------------------#
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]