http://www.mediawiki.org/wiki/Special:Code/MediaWiki/88348
Revision: 88348
Author: diederik
Date: 2011-05-17 20:49:10 +0000 (Tue, 17 May 2011)
Log Message:
-----------
Added index to registration date
Modified Paths:
--------------
trunk/tools/editor_trends/etl/adhoc/user_registration_date.py
Modified: trunk/tools/editor_trends/etl/adhoc/user_registration_date.py
===================================================================
--- trunk/tools/editor_trends/etl/adhoc/user_registration_date.py
2011-05-17 20:42:24 UTC (rev 88347)
+++ trunk/tools/editor_trends/etl/adhoc/user_registration_date.py
2011-05-17 20:49:10 UTC (rev 88348)
@@ -18,29 +18,35 @@
__version__ = '0.1'
import sys
+import os
from datetime import datetime
if '..' not in sys.path:
- sys.path.append('../../')
-
+ sys.path.append('..%s..%s' % (os.sep, os.sep))
+
from classes import storage
+from classes import settings
-location = '/Users/diederik/Desktop/d_20110502.tsv'
-fh = open(location, 'r')
+rts = settings.Settings()
db = storage.init_database('mongo', 'wikilytics', 'enwiki_editors_dataset')
+location = os.path.join(rts.csv_location, 'd_20110502.tsv')
+fh = open(location, 'r')
for i, line in enumerate(fh):
- if i ==0:
+ if i == 0:
continue
line = line.strip()
- line = line.replace("'",'')
+ line = line.replace("'", '')
line = line.split('\t')
- id =line[0]
- id = int(id[:-1])
- #date1=eval(line[1])
+ id = line[0]
+ id = id[:-1]
if line[1] == 'None':
continue
date = datetime.strptime(line[1][:8], '%Y%m%d')
- db.update('id', id, {'reg_date': date})
-
+ if i % 1000 == 0:
+ print 'Updated user %s' % i
+ db.update('editor', id, {'reg_date': date})
+fh.close()
-fh.close()
\ No newline at end of file
+print 'Adding index'
+db_dataset.add_index('reg_date')
+print 'Done.'
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs