http://www.mediawiki.org/wiki/Special:Code/MediaWiki/89427
Revision: 89427
Author: halfak
Date: 2011-06-03 17:10:08 +0000 (Fri, 03 Jun 2011)
Log Message:
-----------
added catgory editors script (not tested)
Added Paths:
-----------
trunk/tools/wsor/category_editors/get_category_editors.py
Added: trunk/tools/wsor/category_editors/get_category_editors.py
===================================================================
--- trunk/tools/wsor/category_editors/get_category_editors.py
(rev 0)
+++ trunk/tools/wsor/category_editors/get_category_editors.py 2011-06-03
17:10:08 UTC (rev 89427)
@@ -0,0 +1,128 @@
+import pymongo, logging, time, argparse, sys
+from collections import deque
+
+
+
+def main(args):
+ LOGGING_STREAM = sys.stderr
+ logging.basicConfig(
+ level=logging.DEBUG,
+ stream=LOGGING_STREAM,
+ format='%(asctime)s %(levelname)-8s %(message)s',
+ datefmt='%b-%d %H:%M:%S'
+ )
+
+ logging.info("Connecting to mongo.")
+ db = pymongo.Connection().wikilytics
+
+ logging.info("Getting arbitration article ids.")
+ catIds = set([
+ a['id'] for a in
+ db.enwiki_articles_dataset.find({'category':
args.category})
+ ])
+ logging.info("Found %s articles with in '%s' category." % (len(catIds),
args.category))
+
+ #Printing headers
+ print(
+ "\t".join([
+ 'username',
+ 'user_id',
+ 'month',
+ 'year',
+ 'edits'
+ ])
+ )
+ def limitPeriod(period):
+ def limit(item, l):
+ return (
+ time.mktime(item['date']) -
+ time.mktime(l[0]['date'])
+ ) < period
+
+ for editor in db.enwiki_editors_raw.find():
+ recent = LimQueue(limit=limitPeriod(args.time))
+ for year, month, edits in get_months_of_edits(editor['edits']):
+ catEdits = [e for e in edits if e['article'] in catIds]
+ if len(catEdits) >= args.n:
+ print(
+ "\t".join(clean(v) for v in [
+ editor['username'],
+ editor['editor'],
+ year,
+ month,
+ len(catEdits)
+ ])
+ )
+
+
+
+
+
+def clean(value):
+ if value == None:
+ return "\N"
+ else:
+ return str(value).replace("\\", "\\\\").replace("\t",
"\\t").replace("\n", "\\n")
+
+def get_months_of_edits(edits):
+ for year, edits in edits.items():
+ #set
+ currMonth = year[0]['date'].strftime("%m")
+ monthEdits = []
+ for edit in edits:
+ month = edit['date'].strftime("%m")
+ if month != currMonth:
+ yield (year, month, monthEdits)
+
+ #reset
+ currMonth = month
+ monthEdits = []
+
+ monthEdits.append(edit)
+
+ yield (year, month, monthEdits)
+
+
+def capitalize(word):
+ if len(word) < 1:
+ return word
+ else:
+ return word[0].capitalize() + word[1:]
+
+
+
+"""class LimQueue(list):
+
+ def __init__(self, iterable=[], limit=lambda l, item: True):
+ list.__init__(self, iterable)
+ self.limit = limit
+
+ def append(self, item):
+ expectoration = []
+ while not self.limit(self, item):
+ expectoration.append(self.pop(0))
+
+ return expectoration
+"""
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description='Finds editors that made at least some number of '
+
+ 'edits to a category of articles in a month. ' +
+ 'This script prints one row for each editor-month '
+
+ 'with enough edits to a category of articles.'
+ )
+ parser.add_argument(
+ 'n',
+ type=int,
+ help='the threshold number of edits per time period in a ' +
+ 'category for inclusion'
+ )
+ parser.add_argument(
+ 'category',
+ type=capitalize,
+ help='the category in which to search for edits'
+ )
+ args = parser.parse_args()
+ main(args)
+
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs