https://www.mediawiki.org/wiki/Special:Code/MediaWiki/115152
Revision: 115152
Author: rfaulk
Date: 2012-05-07 14:22:45 +0000 (Mon, 07 May 2012)
Log Message:
-----------
allowed namespace to be specified in postings generation (in some cases
searching outside of talk pages is necessary)
Modified Paths:
--------------
trunk/tools/wsor/message_templates/run_postings_and_metrics.py
trunk/tools/wsor/message_templates/umetrics/postings.py
Modified: trunk/tools/wsor/message_templates/run_postings_and_metrics.py
===================================================================
--- trunk/tools/wsor/message_templates/run_postings_and_metrics.py
2012-05-07 09:54:15 UTC (rev 115151)
+++ trunk/tools/wsor/message_templates/run_postings_and_metrics.py
2012-05-07 14:22:45 UTC (rev 115152)
@@ -46,15 +46,16 @@
131 : False, 132 : False, 133 : False, 134 : False,
135 : False, 136 : False, 137 : False, 138 : False, 139 : False, 140 : False,
141 : False, 142 : False, # ImageTaggingBot
117 : False, 118 : False, 119 : False, 120 : False,
121 : False, 122 : False, 123 : False, 124 : False, 125 : False, 126 : False,
127 : False, 128 : False, # CorenSearchBot
78 : False, 79 : False, 81 : False, 82 : False, #
TWINKLE
- 4 : True, 5 : True, # Welcome templates - chico
- 143 : False, 144 : False, 145 : False, 146 : False #
28 bot
+ 4 : False, 5 : False, # Welcome templates - chico
+ 143 : False, 144 : False, 145 : False, 146 : False, #
28 bot
+ 147 : True # Rcsprinter bot
}
# template_indices = {78 : True}
# Run postings and metrics
- generator = 'editcounts'
- postings = True
+ generator = 'warnings'
+ postings = False
# postings_cmd = './postings -h db1047 --start=%(start_time)s
--end=%(end_time)s --comment="%(rev_comment)s" --message="{{%(template)s}}"
--outfilename postings_%(file_name)s.tsv'
postings_cmd = './postings -h db42 --start=%(start_time)s
--end=%(end_time)s --message="{{%(template)s}}" --outfilename
postings_%(file_name)s.tsv'
@@ -69,7 +70,7 @@
template_name = 'z' + str(key)
logging.info('Generating postings for %s' % template_name)
- name, start_ts, end_ts, comment, user, api_uri, use_rev_file =
get_experiment(key)
+ name, start_ts, end_ts, comment, user, api_uri, use_rev_file,
namespace = get_experiment(key)
if key >= 60 and key <= 116:
filename_part = start_ts[4:8] + '_' + end_ts[4:8] + '_' +
template_name
@@ -92,6 +93,8 @@
cmd += ' -a %s' % api_uri
if use_rev_file != None:
cmd += ' --use_in_file %s' % use_rev_file
+ if namespace != None:
+ cmd += ' --namespace %s' % namespace
else:
cmd = metrics_cmd % {'file_name' : filename_part, 'generator'
: generator, 'fname_generator' : generator}
@@ -114,6 +117,7 @@
comment = None
api_uri = None
use_rev_file = None
+ namespace = None
if index >= 60 and index <= 77:
test_handle = 'Huggle_3'
@@ -179,9 +183,17 @@
user = '28bot'
comment = '.*'
+ elif index == 147:
+ test_handle = 'RcsprinterBot'
+ start_ts = '20120119000000'
+ end_ts = '20120501000000'
+ user = 'RcsprinterBot'
+ comment = '.*'
+ namespace = 0
+
logging.info('Processing %(test_handle)s from %(start_ts)s to %(end_ts)s
on comment "%(comment)s" for user "%(user)s" ...' % {'test_handle' :
test_handle, 'start_ts' : start_ts, 'end_ts' : end_ts, 'comment' : comment,
'user' : user})
- return test_handle, start_ts, end_ts, comment, user, api_uri, use_rev_file
+ return test_handle, start_ts, end_ts, comment, user, api_uri,
use_rev_file, namespace
"""
Call main, exit when execution is complete
Modified: trunk/tools/wsor/message_templates/umetrics/postings.py
===================================================================
--- trunk/tools/wsor/message_templates/umetrics/postings.py 2012-05-07
09:54:15 UTC (rev 115151)
+++ trunk/tools/wsor/message_templates/umetrics/postings.py 2012-05-07
14:22:45 UTC (rev 115152)
@@ -151,6 +151,12 @@
help='indicates that revisions should be read from a file. Name is to
be specified.',
default=''
)
+ parser.add_argument(
+ '--namespace',
+ type=str,
+ help='Page namespace on which to read revisions.',
+ default=3
+ )
args = parser.parse_args()
@@ -220,7 +226,7 @@
line = in_file.readline()
else:
- for rev in db.getPostings(args.start, args.end,
userName=args.user_name, commentRE=args.comment):
+ for rev in db.getPostings(args.start, args.end,
userName=args.user_name, commentRE=args.comment, namespace=args.namespace):
count += 1
revs.append(rev)
if count % 100 == 0: LOGGING_STREAM.write("|")
@@ -280,7 +286,7 @@
self.kwargs = kwargs
self.conn = MySQLdb.connect(*args, **kwargs)
- def getPostings(self, start, end, userName=None, commentRE=None):
+ def getPostings(self, start, end, userName=None, commentRE=None,
namespace=3):
cursor = self.conn.cursor(MySQLdb.cursors.SSDictCursor)
query = """
@@ -294,7 +300,7 @@
FROM revision r
INNER JOIN page p ON r.rev_page = p.page_id
WHERE rev_timestamp BETWEEN %(start)s AND %(end)s
- AND page_namespace = 3
+ AND page_namespace = %(page_namespace)s
"""
if userName != None:
@@ -302,14 +308,10 @@
if commentRE != None:
query += 'AND rev_comment REGEXP %(comment_pattern)s\n'
+ query = query % {'start': start, 'end': end, 'user_name': userName,
'comment_pattern': commentRE.pattern, 'page_namespace' : namespace}
+
cursor.execute(
- query,
- {
- 'start': start,
- 'end': end,
- 'user_name': userName,
- 'comment_pattern': commentRE.pattern
- }
+ query
)
return cursor
@@ -362,7 +364,12 @@
)
result = json.load(response)
- diff =
result['query']['pages'].values()[0]['revisions'][0]['*']
+ try:
+ diff =
result['query']['pages'].values()[0]['revisions'][0]['*']
+ except KeyError:
+ sys.stderr.write("x")
+ diff = ''
+ pass
# Add the diff tags such that the content is parsed as if
it were a diff
if type(diff) not in types.StringTypes: diff = ''
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs