http://www.mediawiki.org/wiki/Special:Code/MediaWiki/82764

Revision: 82764
Author:   diederik
Date:     2011-02-24 23:37:03 +0000 (Thu, 24 Feb 2011)
Log Message:
-----------
Fixed a locking situation.

Modified Paths:
--------------
    trunk/tools/editor_trends/analyses/analyzer.py
    trunk/tools/editor_trends/classes/dataset.py

Modified: trunk/tools/editor_trends/analyses/analyzer.py
===================================================================
--- trunk/tools/editor_trends/analyses/analyzer.py      2011-02-24 23:36:43 UTC 
(rev 82763)
+++ trunk/tools/editor_trends/analyses/analyzer.py      2011-02-24 23:37:03 UTC 
(rev 82764)
@@ -38,7 +38,31 @@
 from utils import timer
 from utils import log
 
+class Replicator:
+    def __init__(self, rts, plugin, time_unit, cutoff=None, cum_cutoff=None, 
**kwargs):
+        self.plugin = plugin
+        self.rts = rts
+        self.time_unit = time_unit
+        if cutoff == None:
+            self.cutoff = [1, 10, 50]
+        else:
+            self.cutoff = cutoff
 
+        if cutoff == None:
+            self.cum_cutoff = [10]
+        else:
+            self.cum_cutoff = cum_cutoff
+        self.kwargs = kwargs
+
+    def __call__(self):
+        for cum_cutoff in self.cum_cutoff:
+            for cutoff in self.cutoff:
+                generate_chart_data(self.rts, self.plugin,
+                                    time_unit=self.time_unit,
+                                    cutoff=cutoff, cum_cutoff=cum_cutoff,
+                                    **self.kwargs)
+
+
 class Analyzer(consumers.BaseConsumer):
     def __init__(self, rts, tasks, result, var):
         super(Analyzer, self).__init__(rts, tasks, result)
@@ -109,9 +133,9 @@
                                                ds.filename)
     ds.write(format='csv')
     print 'Serializing dataset to %s_%s' % (rts.dbname, 'charts')
-    log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='start')
-    ds.write(format='mongo')
-    log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='finish')
+    #log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='start')
+    #ds.write(format='mongo')
+    #log.log_to_mongo(rts, 'chart', 'storing', stopwatch, event='finish')
 
 
 def generate_chart_data(rts, func, **kwargs):
@@ -121,6 +145,8 @@
     '''
     stopwatch = timer.Timer()
     plugin = retrieve_plugin(func)
+    if not plugin:
+        raise 'Plugin function %s is unknown, please make sure that you 
specify an existing plugin function.' % func
     feedback(plugin, rts)
 
     obs = dict()
@@ -177,11 +203,11 @@
     tasks.join()
 
     reconstruct_observations(var)
-    ds = dataset.Dataset(plugin.func_name, rts, format=fmt)
+    ds = dataset.Dataset(plugin.func_name, rts, format=fmt, **kwargs)
     ds.add_variable(var)
 
     stopwatch.elapsed()
-    #write_output(ds, rts, stopwatch)
+    write_output(ds, rts, stopwatch)
 
     ds.summary()
     #return True
@@ -202,7 +228,7 @@
     return min_year, max_year
 
 
-if __name__ == '__main__':
+def launcher():
     project, language, parser = manager.init_args_parser()
     args = parser.parse_args(['django'])
     rts = runtime_settings.init_environment('wiki', 'en', args)
@@ -212,15 +238,25 @@
     rts.editors_dataset = 'editors_dataset'
     #END TEMP FIX
 
-    generate_chart_data(rts, 'histogram_by_backward_cohort', time_unit='year', 
cutoff=1, cum_cutoff=10)
+#    replicator = Replicator(rts, 'histogram_by_backward_cohort', 
time_unit='year')
+#    replicator()
+#    replicator = Replicator(rts, 'cohort_dataset_backward_bar', 
time_unit='year', format='wide')
+#    replicator()
+
+#    generate_chart_data(rts, 'histogram_by_backward_cohort', 
time_unit='year', cutoff=1, cum_cutoff=10)
 #    generate_chart_data(rts, 'edit_patterns', time_unit='year', cutoff=5)
 #    generate_chart_data(rts, 'total_number_of_new_wikipedians', 
time_unit='year')
 #    generate_chart_data(rts, 'total_number_of_articles', time_unit='year')
 #    generate_chart_data(rts, 'total_cumulative_edits', time_unit='year')
-#    generate_chart_data(rts, 'cohort_dataset_forward_histogram', 
time_unit='month', cutoff=1, cum_cutoff=10)
-    generate_chart_data(rts, 'cohort_dataset_backward_bar', time_unit='year', 
cutoff=1, cum_cutoff=10, format='wide')
+    generate_chart_data(rts, 'cohort_dataset_forward_histogram', 
time_unit='month', cutoff=1, cum_cutoff=10)
+#    generate_chart_data(rts, 'cohort_dataset_backward_bar', time_unit='year', 
cutoff=1, cum_cutoff=10, format='wide')
 #    generate_chart_data(rts, 'cohort_dataset_forward_bar', time_unit='year', 
cutoff=5, cum_cutoff=0, format='wide')
 #    generate_chart_data(rts, 'histogram_edits', time_unit='year', cutoff=0)
 #    generate_chart_data(rts, 'time_to_new_wikipedian', time_unit='year', 
cutoff=0)
 #    generate_chart_data(rts, 'new_editor_count', time_unit='month', cutoff=0)
 #    #available_analyses()
+
+
+
+if __name__ == '__main__':
+    launcher()

Modified: trunk/tools/editor_trends/classes/dataset.py
===================================================================
--- trunk/tools/editor_trends/classes/dataset.py        2011-02-24 23:36:43 UTC 
(rev 82763)
+++ trunk/tools/editor_trends/classes/dataset.py        2011-02-24 23:37:03 UTC 
(rev 82764)
@@ -26,7 +26,7 @@
 import cPickle
 import hashlib
 from pymongo.son_manipulator import SONManipulator
-from multiprocessing import Manager
+from multiprocessing import Manager, RLock
 from texttable import Texttable
 from datetime import timedelta
 
@@ -91,13 +91,12 @@
         This is a generic hash function that expects a list of variables, used
         to lookup an Observation or Variable. 
         '''
-        id = '_'.join([str(var) for var in vars])
+        return hash('_'.join([str(var) for var in vars]))
         #return id
-        m = hashlib.md5()
-        m.update(id)
+        #m = hashlib.md5()
+        #m.update(id)
         #print id, m.hexdigest()
-        return m.hexdigest()
-        #return ''.join([str(var) for var in vars])
+        #return m.hexdigest()
 
     def encode_to_bson(self, data=None):
         '''
@@ -209,20 +208,8 @@
         else:
             self.data += value
         self.count += 1
-#        self.lock.acquire()
-#        try:
-#            if isinstance(value, list):
-#                if self.count == 0:
-#                    self.data = []
-#                self.data.append(value)
-#            else:
-#                self.data += value
-#        finally:
-#            self.count += 1
-#            self.lock.release()
 
 
-
     def get_date_range(self):
         return '%s-%s-%s:%s-%s-%s' % (self.t0.month, self.t0.day, 
self.t0.year, \
                                       self.t1.month, self.t1.day, self.t1.year)
@@ -361,9 +348,9 @@
     '''
 
     def __init__(self, chart, rts, vars=None, **kwargs):
-        self.encoder, chart, charts = json_encoders.get_json_encoder(chart)
+        self.encoder, chart_type, charts = 
json_encoders.get_json_encoder(chart)
         if self.encoder == None:
-            raise exceptions.UnknownChartError(chart, charts)
+            raise exceptions.UnknownChartError(chart_type, charts)
         self.chart = chart
         self.name = 'Dataset to construct %s' % self.chart
         self.project = rts.project.name
@@ -427,7 +414,7 @@
         attrs = '_'.join(['%s=%s' % (k, getattr(var, k)) for k in keys])
         filename = '%s%s_%s_%s.csv' % (self.language_code,
                                        self.project,
-                                       self.name,
+                                       self.chart,
                                        attrs)
         self.filename = filename
 
@@ -467,9 +454,15 @@
     def to_csv(self):
         data = data_converter.convert_dataset_to_lists(self, 'manage')
         headers = data_converter.add_headers(self)
-        fh = file_utils.create_txt_filehandle(settings.dataset_location, 
self.filename, 'w', settings.encoding)
+        lock = RLock()
+        fh = file_utils.create_txt_filehandle(settings.dataset_location,
+                                              self.filename,
+                                              'w',
+                                              settings.encoding)
         file_utils.write_list_to_csv(headers, fh, recursive=False, 
newline=True)
-        file_utils.write_list_to_csv(data, fh, recursive=False, newline=True, 
format=self.format)
+        file_utils.write_list_to_csv(data, fh, recursive=False, newline=True,
+                                     format=self.format,
+                                     lock=lock)
         fh.close()
 
     def encode(self):


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to