http://www.mediawiki.org/wiki/Special:Code/MediaWiki/88348

Revision: 88348
Author:   diederik
Date:     2011-05-17 20:49:10 +0000 (Tue, 17 May 2011)
Log Message:
-----------
Added index to registration date

Modified Paths:
--------------
    trunk/tools/editor_trends/etl/adhoc/user_registration_date.py

Modified: trunk/tools/editor_trends/etl/adhoc/user_registration_date.py
===================================================================
--- trunk/tools/editor_trends/etl/adhoc/user_registration_date.py       
2011-05-17 20:42:24 UTC (rev 88347)
+++ trunk/tools/editor_trends/etl/adhoc/user_registration_date.py       
2011-05-17 20:49:10 UTC (rev 88348)
@@ -18,29 +18,35 @@
 __version__ = '0.1'
 
 import sys
+import os
 from datetime import datetime
 if '..' not in sys.path:
-    sys.path.append('../../')
-    
+    sys.path.append('..%s..%s' % (os.sep, os.sep))
+
 from classes import storage
+from classes import settings
 
-location  = '/Users/diederik/Desktop/d_20110502.tsv'
-fh = open(location, 'r')
+rts = settings.Settings()
 db = storage.init_database('mongo', 'wikilytics', 'enwiki_editors_dataset')
+location = os.path.join(rts.csv_location, 'd_20110502.tsv')
 
+fh = open(location, 'r')
 for i, line in enumerate(fh):
-    if i ==0:
+    if i == 0:
         continue
     line = line.strip()
-    line = line.replace("'",'')
+    line = line.replace("'", '')
     line = line.split('\t')
-    id =line[0]
-    id = int(id[:-1])
-    #date1=eval(line[1])
+    id = line[0]
+    id = id[:-1]
     if line[1] == 'None':
         continue
     date = datetime.strptime(line[1][:8], '%Y%m%d')
-    db.update('id', id, {'reg_date': date})
-    
+    if i % 1000 == 0:
+        print 'Updated user %s' % i
+    db.update('editor', id, {'reg_date': date})
+fh.close()
 
-fh.close()
\ No newline at end of file
+print 'Adding index'
+db_dataset.add_index('reg_date')
+print 'Done.'


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to