http://www.mediawiki.org/wiki/Special:Code/MediaWiki/84701
Revision: 84701
Author: diederik
Date: 2011-03-24 20:11:09 +0000 (Thu, 24 Mar 2011)
Log Message:
-----------
Streamlined code a little bit, should be marginally faster.
Modified Paths:
--------------
trunk/tools/editor_trends/etl/enricher.py
Modified: trunk/tools/editor_trends/etl/enricher.py
===================================================================
--- trunk/tools/editor_trends/etl/enricher.py 2011-03-24 20:09:11 UTC (rev
84700)
+++ trunk/tools/editor_trends/etl/enricher.py 2011-03-24 20:11:09 UTC (rev
84701)
@@ -455,7 +455,7 @@
cassandra.install_schema(keyspace_name, drop_first=True)
-def launcher(function, path, dataset, storage):
+def launcher(function, path, dataset, storage, processors):
setup(storage)
input_queue = JoinableQueue()
#files =
['C:\\Users\\diederik.vanliere\\Downloads\\enwiki-latest-pages-articles1.xml.bz2']
@@ -472,7 +472,7 @@
input_queue.put(None)
extracters = [Process(target=stream_raw_xml, args=[input_queue, storage,
id, function, dataset])
- for id in xrange(cpu_count())]
+ for id in xrange(processors)]
for extracter in extracters:
extracter.start()
@@ -485,7 +485,8 @@
function = create_variables
storage = 'csv'
dataset = 'training'
- launcher(function1, path1, dataset1, storage)
+ processors = cpu_count()
+ launcher(function, path, dataset, storage, processors)
def launcher_prediction():
@@ -494,7 +495,8 @@
function = count_edits
storage = 'csv'
dataset = 'prediction'
- launcher(function2, path2, dataset2, storage)
+ processors = 1
+ launcher(function, path, dataset, storage, processors)
if __name__ == '__main__':
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs