http://www.mediawiki.org/wiki/Special:Code/MediaWiki/84701

Revision: 84701
Author:   diederik
Date:     2011-03-24 20:11:09 +0000 (Thu, 24 Mar 2011)
Log Message:
-----------
Streamlined code a little bit, should be marginally faster. 

Modified Paths:
--------------
    trunk/tools/editor_trends/etl/enricher.py

Modified: trunk/tools/editor_trends/etl/enricher.py
===================================================================
--- trunk/tools/editor_trends/etl/enricher.py   2011-03-24 20:09:11 UTC (rev 
84700)
+++ trunk/tools/editor_trends/etl/enricher.py   2011-03-24 20:11:09 UTC (rev 
84701)
@@ -455,7 +455,7 @@
         cassandra.install_schema(keyspace_name, drop_first=True)
 
 
-def launcher(function, path, dataset, storage):
+def launcher(function, path, dataset, storage, processors):
     setup(storage)
     input_queue = JoinableQueue()
     #files = 
['C:\\Users\\diederik.vanliere\\Downloads\\enwiki-latest-pages-articles1.xml.bz2']
@@ -472,7 +472,7 @@
         input_queue.put(None)
 
     extracters = [Process(target=stream_raw_xml, args=[input_queue, storage, 
id, function, dataset])
-                  for id in xrange(cpu_count())]
+                  for id in xrange(processors)]
     for extracter in extracters:
         extracter.start()
 
@@ -485,7 +485,8 @@
     function = create_variables
     storage = 'csv'
     dataset = 'training'
-    launcher(function1, path1, dataset1, storage)
+    processors = cpu_count()
+    launcher(function, path, dataset, storage, processors)
 
 
 def launcher_prediction():
@@ -494,7 +495,8 @@
     function = count_edits
     storage = 'csv'
     dataset = 'prediction'
-    launcher(function2, path2, dataset2, storage)
+    processors = 1
+    launcher(function, path, dataset, storage, processors)
 
 
 if __name__ == '__main__':


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to