http://www.mediawiki.org/wiki/Special:Code/MediaWiki/82764
Revision: 82764
Author: diederik
Date: 2011-02-24 23:37:03 +0000 (Thu, 24 Feb 2011)
Log Message:
-----------
Fixed a locking situation.
Modified Paths:
--------------
trunk/tools/editor_trends/analyses/analyzer.py
trunk/tools/editor_trends/classes/dataset.py
Modified: trunk/tools/editor_trends/analyses/analyzer.py
===================================================================
--- trunk/tools/editor_trends/analyses/analyzer.py 2011-02-24 23:36:43 UTC
(rev 82763)
+++ trunk/tools/editor_trends/analyses/analyzer.py 2011-02-24 23:37:03 UTC
(rev 82764)
@@ -38,7 +38,31 @@
from utils import timer
from utils import log
+class Replicator:
+ def __init__(self, rts, plugin, time_unit, cutoff=None, cum_cutoff=None,
**kwargs):
+ self.plugin = plugin
+ self.rts = rts
+ self.time_unit = time_unit
+ if cutoff == None:
+ self.cutoff = [1, 10, 50]
+ else:
+ self.cutoff = cutoff
+ if cutoff == None:
+ self.cum_cutoff = [10]
+ else:
+ self.cum_cutoff = cum_cutoff
+ self.kwargs = kwargs
+
+ def __call__(self):
+ for cum_cutoff in self.cum_cutoff:
+ for cutoff in self.cutoff:
+ generate_chart_data(self.rts, self.plugin,
+ time_unit=self.time_unit,
+ cutoff=cutoff, cum_cutoff=cum_cutoff,
+ **self.kwargs)
+
+
class Analyzer(consumers.BaseConsumer):
def __init__(self, rts, tasks, result, var):
super(Analyzer, self).__init__(rts, tasks, result)
@@ -109,9 +133,9 @@
ds.filename)
ds.write(format='csv')
print 'Serializing dataset to %s_%s' % (rts.dbname, 'charts')
- log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='start')
- ds.write(format='mongo')
- log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='finish')
+ #log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='start')
+ #ds.write(format='mongo')
+ #log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='finish')
def generate_chart_data(rts, func, **kwargs):
@@ -121,6 +145,8 @@
'''
stopwatch = timer.Timer()
plugin = retrieve_plugin(func)
+ if not plugin:
+ raise 'Plugin function %s is unknown, please make sure that you
specify an existing plugin function.' % func
feedback(plugin, rts)
obs = dict()
@@ -177,11 +203,11 @@
tasks.join()
reconstruct_observations(var)
- ds = dataset.Dataset(plugin.func_name, rts, format=fmt)
+ ds = dataset.Dataset(plugin.func_name, rts, format=fmt, **kwargs)
ds.add_variable(var)
stopwatch.elapsed()
- #write_output(ds, rts, stopwatch)
+ write_output(ds, rts, stopwatch)
ds.summary()
#return True
@@ -202,7 +228,7 @@
return min_year, max_year
-if __name__ == '__main__':
+def launcher():
project, language, parser = manager.init_args_parser()
args = parser.parse_args(['django'])
rts = runtime_settings.init_environment('wiki', 'en', args)
@@ -212,15 +238,25 @@
rts.editors_dataset = 'editors_dataset'
#END TEMP FIX
- generate_chart_data(rts, 'histogram_by_backward_cohort', time_unit='year',
cutoff=1, cum_cutoff=10)
+# replicator = Replicator(rts, 'histogram_by_backward_cohort',
time_unit='year')
+# replicator()
+# replicator = Replicator(rts, 'cohort_dataset_backward_bar',
time_unit='year', format='wide')
+# replicator()
+
+# generate_chart_data(rts, 'histogram_by_backward_cohort',
time_unit='year', cutoff=1, cum_cutoff=10)
# generate_chart_data(rts, 'edit_patterns', time_unit='year', cutoff=5)
# generate_chart_data(rts, 'total_number_of_new_wikipedians',
time_unit='year')
# generate_chart_data(rts, 'total_number_of_articles', time_unit='year')
# generate_chart_data(rts, 'total_cumulative_edits', time_unit='year')
-# generate_chart_data(rts, 'cohort_dataset_forward_histogram',
time_unit='month', cutoff=1, cum_cutoff=10)
- generate_chart_data(rts, 'cohort_dataset_backward_bar', time_unit='year',
cutoff=1, cum_cutoff=10, format='wide')
+ generate_chart_data(rts, 'cohort_dataset_forward_histogram',
time_unit='month', cutoff=1, cum_cutoff=10)
+# generate_chart_data(rts, 'cohort_dataset_backward_bar', time_unit='year',
cutoff=1, cum_cutoff=10, format='wide')
# generate_chart_data(rts, 'cohort_dataset_forward_bar', time_unit='year',
cutoff=5, cum_cutoff=0, format='wide')
# generate_chart_data(rts, 'histogram_edits', time_unit='year', cutoff=0)
# generate_chart_data(rts, 'time_to_new_wikipedian', time_unit='year',
cutoff=0)
# generate_chart_data(rts, 'new_editor_count', time_unit='month', cutoff=0)
# #available_analyses()
+
+
+
+if __name__ == '__main__':
+ launcher()
Modified: trunk/tools/editor_trends/classes/dataset.py
===================================================================
--- trunk/tools/editor_trends/classes/dataset.py 2011-02-24 23:36:43 UTC
(rev 82763)
+++ trunk/tools/editor_trends/classes/dataset.py 2011-02-24 23:37:03 UTC
(rev 82764)
@@ -26,7 +26,7 @@
import cPickle
import hashlib
from pymongo.son_manipulator import SONManipulator
-from multiprocessing import Manager
+from multiprocessing import Manager, RLock
from texttable import Texttable
from datetime import timedelta
@@ -91,13 +91,12 @@
This is a generic hash function that expects a list of variables, used
to lookup an Observation or Variable.
'''
- id = '_'.join([str(var) for var in vars])
+ return hash('_'.join([str(var) for var in vars]))
#return id
- m = hashlib.md5()
- m.update(id)
+ #m = hashlib.md5()
+ #m.update(id)
#print id, m.hexdigest()
- return m.hexdigest()
- #return ''.join([str(var) for var in vars])
+ #return m.hexdigest()
def encode_to_bson(self, data=None):
'''
@@ -209,20 +208,8 @@
else:
self.data += value
self.count += 1
-# self.lock.acquire()
-# try:
-# if isinstance(value, list):
-# if self.count == 0:
-# self.data = []
-# self.data.append(value)
-# else:
-# self.data += value
-# finally:
-# self.count += 1
-# self.lock.release()
-
def get_date_range(self):
return '%s-%s-%s:%s-%s-%s' % (self.t0.month, self.t0.day,
self.t0.year, \
self.t1.month, self.t1.day, self.t1.year)
@@ -361,9 +348,9 @@
'''
def __init__(self, chart, rts, vars=None, **kwargs):
- self.encoder, chart, charts = json_encoders.get_json_encoder(chart)
+ self.encoder, chart_type, charts =
json_encoders.get_json_encoder(chart)
if self.encoder == None:
- raise exceptions.UnknownChartError(chart, charts)
+ raise exceptions.UnknownChartError(chart_type, charts)
self.chart = chart
self.name = 'Dataset to construct %s' % self.chart
self.project = rts.project.name
@@ -427,7 +414,7 @@
attrs = '_'.join(['%s=%s' % (k, getattr(var, k)) for k in keys])
filename = '%s%s_%s_%s.csv' % (self.language_code,
self.project,
- self.name,
+ self.chart,
attrs)
self.filename = filename
@@ -467,9 +454,15 @@
def to_csv(self):
data = data_converter.convert_dataset_to_lists(self, 'manage')
headers = data_converter.add_headers(self)
- fh = file_utils.create_txt_filehandle(settings.dataset_location,
self.filename, 'w', settings.encoding)
+ lock = RLock()
+ fh = file_utils.create_txt_filehandle(settings.dataset_location,
+ self.filename,
+ 'w',
+ settings.encoding)
file_utils.write_list_to_csv(headers, fh, recursive=False,
newline=True)
- file_utils.write_list_to_csv(data, fh, recursive=False, newline=True,
format=self.format)
+ file_utils.write_list_to_csv(data, fh, recursive=False, newline=True,
+ format=self.format,
+ lock=lock)
fh.close()
def encode(self):
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs