http://www.mediawiki.org/wiki/Special:Code/MediaWiki/76348

Revision: 76348
Author:   diederik
Date:     2010-11-08 23:09:01 +0000 (Mon, 08 Nov 2010)
Log Message:
-----------
Added store in MongoDB function.

Modified Paths:
--------------
    trunk/tools/editor_trends/map_wiki_editors.py
    trunk/tools/editor_trends/utils/sort.py

Modified: trunk/tools/editor_trends/map_wiki_editors.py
===================================================================
--- trunk/tools/editor_trends/map_wiki_editors.py       2010-11-08 23:02:41 UTC 
(rev 76347)
+++ trunk/tools/editor_trends/map_wiki_editors.py       2010-11-08 23:09:01 UTC 
(rev 76348)
@@ -138,9 +138,8 @@
     
     Output is the data_queue that will be used by store_editors() 
     '''
-    input = os.path.join(settings.XML_FILE_LOCATION, kwargs.get('language', 
'en'), kwargs.get('project', 'wiki'))
-    output = os.path.join(input, 'txt')
-    utils.create_directory(output)
+    input = kwargs.get('input', None)
+    output = kwargs.get('output', None)
     debug = kwargs.get('debug', False)
     destination = kwargs.get('destination', 'file')
     
@@ -301,6 +300,9 @@
 
 def run_parse_editors(location, language, project):
     ids = load_bot_ids()
+    input = os.path.join(location, language, project)
+    output = os.path.join(input, 'txt')
+ 
     kwargs = {'bots': ids,
               'dbname': language + project,
               'language': language,
@@ -309,26 +311,32 @@
               'destination': 'file',
               'nr_input_processors': settings.NUMBER_OF_PROCESSES,
               'nr_output_processors': settings.NUMBER_OF_PROCESSES,
+              'input': input,
+              'output': output,
               }
     chunks = {}
     source = os.path.join(location, language, project)
     files = utils.retrieve_file_list(source, 'xml')
     parts = int(round(float(len(files)) / settings.NUMBER_OF_PROCESSES, 0))
     a = 0
+    
+    if not os.path.exists(input):
+        utils.create_directory(input)
+    if not os.path.exists(output):
+        utils.create_directory(output)
+        
     for x in xrange(settings.NUMBER_OF_PROCESSES):
         b = a + parts
         chunks[x] = files[a:b]
         a = (x + 1) * parts
 
     pc.build_scaffolding(pc.load_queue, parse_editors, chunks, False, False, 
**kwargs)
-    #search_cache_for_missed_editors(dbname)
 
 
 def debug_parse_editors(dbname):
     q = JoinableQueue()
     parse_editors('522.xml', q, None, None, debug=True, destination='file')
     store_editors(q, [], dbname)
-    #search_cache_for_missed_editors(dbname)
 
 
 if __name__ == "__main__":

Modified: trunk/tools/editor_trends/utils/sort.py
===================================================================
--- trunk/tools/editor_trends/utils/sort.py     2010-11-08 23:02:41 UTC (rev 
76347)
+++ trunk/tools/editor_trends/utils/sort.py     2010-11-08 23:09:01 UTC (rev 
76348)
@@ -28,6 +28,7 @@
 
 import settings
 import utils
+from database import cache
 
 def quick_sort(obs):
     if obs == []:
@@ -92,6 +93,23 @@
     fh.close()
 
 
+def store_editors(input, dbname):
+    fh = utils.create_txt_filehandle(input, 'merged.txt', 'r', 
settings.ENCODING)
+    mongo = db.init_mongo_db(dbname)
+    collection = mongo['editors']
+    mongo.collection.ensure_index('editor')
+    editor_cache = cache.EditorCache(collection)
+    prev_contributor = ''
+    for line in readline(file):
+        contributor = line[0]
+        if prev_contributor != contributor:
+             editor_cache.add('NEXT', '')
+        value = {'date': line[1], 'article': line[2]}
+        editor_cache.add(contributor, value)
+        prev_contributor = contributor
+    fh.close()
+
+
 def debug_merge_sorted_files(input, output):
     files = utils.retrieve_file_list(input, 'txt', mask='')
     filehandles = [utils.create_txt_filehandle(input, file, 'r', 
settings.ENCODING) for file in files]


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to