http://www.mediawiki.org/wiki/Special:Code/MediaWiki/76348
Revision: 76348
Author: diederik
Date: 2010-11-08 23:09:01 +0000 (Mon, 08 Nov 2010)
Log Message:
-----------
Added store in MongoDB function.
Modified Paths:
--------------
trunk/tools/editor_trends/map_wiki_editors.py
trunk/tools/editor_trends/utils/sort.py
Modified: trunk/tools/editor_trends/map_wiki_editors.py
===================================================================
--- trunk/tools/editor_trends/map_wiki_editors.py 2010-11-08 23:02:41 UTC
(rev 76347)
+++ trunk/tools/editor_trends/map_wiki_editors.py 2010-11-08 23:09:01 UTC
(rev 76348)
@@ -138,9 +138,8 @@
Output is the data_queue that will be used by store_editors()
'''
- input = os.path.join(settings.XML_FILE_LOCATION, kwargs.get('language',
'en'), kwargs.get('project', 'wiki'))
- output = os.path.join(input, 'txt')
- utils.create_directory(output)
+ input = kwargs.get('input', None)
+ output = kwargs.get('output', None)
debug = kwargs.get('debug', False)
destination = kwargs.get('destination', 'file')
@@ -301,6 +300,9 @@
def run_parse_editors(location, language, project):
ids = load_bot_ids()
+ input = os.path.join(location, language, project)
+ output = os.path.join(input, 'txt')
+
kwargs = {'bots': ids,
'dbname': language + project,
'language': language,
@@ -309,26 +311,32 @@
'destination': 'file',
'nr_input_processors': settings.NUMBER_OF_PROCESSES,
'nr_output_processors': settings.NUMBER_OF_PROCESSES,
+ 'input': input,
+ 'output': output,
}
chunks = {}
source = os.path.join(location, language, project)
files = utils.retrieve_file_list(source, 'xml')
parts = int(round(float(len(files)) / settings.NUMBER_OF_PROCESSES, 0))
a = 0
+
+ if not os.path.exists(input):
+ utils.create_directory(input)
+ if not os.path.exists(output):
+ utils.create_directory(output)
+
for x in xrange(settings.NUMBER_OF_PROCESSES):
b = a + parts
chunks[x] = files[a:b]
a = (x + 1) * parts
pc.build_scaffolding(pc.load_queue, parse_editors, chunks, False, False,
**kwargs)
- #search_cache_for_missed_editors(dbname)
def debug_parse_editors(dbname):
q = JoinableQueue()
parse_editors('522.xml', q, None, None, debug=True, destination='file')
store_editors(q, [], dbname)
- #search_cache_for_missed_editors(dbname)
if __name__ == "__main__":
Modified: trunk/tools/editor_trends/utils/sort.py
===================================================================
--- trunk/tools/editor_trends/utils/sort.py 2010-11-08 23:02:41 UTC (rev
76347)
+++ trunk/tools/editor_trends/utils/sort.py 2010-11-08 23:09:01 UTC (rev
76348)
@@ -28,6 +28,7 @@
import settings
import utils
+from database import cache
def quick_sort(obs):
if obs == []:
@@ -92,6 +93,23 @@
fh.close()
+def store_editors(input, dbname):
+ fh = utils.create_txt_filehandle(input, 'merged.txt', 'r',
settings.ENCODING)
+ mongo = db.init_mongo_db(dbname)
+ collection = mongo['editors']
+ mongo.collection.ensure_index('editor')
+ editor_cache = cache.EditorCache(collection)
+ prev_contributor = ''
+ for line in readline(file):
+ contributor = line[0]
+ if prev_contributor != contributor:
+ editor_cache.add('NEXT', '')
+ value = {'date': line[1], 'article': line[2]}
+ editor_cache.add(contributor, value)
+ prev_contributor = contributor
+ fh.close()
+
+
def debug_merge_sorted_files(input, output):
files = utils.retrieve_file_list(input, 'txt', mask='')
filehandles = [utils.create_txt_filehandle(input, file, 'r',
settings.ENCODING) for file in files]
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs