http://www.mediawiki.org/wiki/Special:Code/MediaWiki/90977
Revision: 90977
Author: diederik
Date: 2011-06-28 16:33:42 +0000 (Tue, 28 Jun 2011)
Log Message:
-----------
Kaggle datafile generator final.
Modified Paths:
--------------
trunk/tools/editor_trends/kaggle/training_db.py
Modified: trunk/tools/editor_trends/kaggle/training_db.py
===================================================================
--- trunk/tools/editor_trends/kaggle/training_db.py 2011-06-28 16:23:30 UTC
(rev 90976)
+++ trunk/tools/editor_trends/kaggle/training_db.py 2011-06-28 16:33:42 UTC
(rev 90977)
@@ -253,32 +253,34 @@
namespaces = IDGenerator()
print 'Parsing revisions...'
db_raw = storage.init_database('mongo', 'wikilytics', 'enwiki_editors_raw')
-seen_editors = {}
-editors = {}
-x = 1
-for editor in post_editors:
+#seen_editors = {}
+#editors = {}
+#x = 1
+#for editor in post_editors:
+# #print editor
+# editors[x] = editor
+# x += 2
+#x = 0
+#z = len(post_editors)
+#for y, editor in enumerate(pre_editors):
+# #print editor
+# editors[x] = editor
+# x += 2
+# if z == y:
+# break
+#
+#editor_keys = editors.keys()
+#editor_keys.sort()
+#for key in editor_keys:
+# #print editors
+# #for editor in editors:
+# editor = editors[key]
#print editor
- editors[x] = editor
- x += 2
-x = 0
-z = len(post_editors)
-for y, editor in enumerate(pre_editors):
- #print editor
- editors[x] = editor
- x += 2
- if z == y:
- break
-
-editor_keys = editors.keys()
-editor_keys.sort()
-for key in editor_keys:
- #print editors
- #for editor in editors:
- editor = editors[key]
- #print editor
- go = editors_seen.get(editor, True)
- if go:
- editors_seen[editor] = False
+for editors in izip(pre_editors, post_editors):
+ for editor in editors:
+ #go = editors_seen.get(editor, True)
+ #if go:
+ # editors_seen[editor] = False
user_id = idg.get_id(editor)
print 'Parsing editor %s (%s) ...' % (editor, user_id)
revisions = db_raw.find({'user_id': str(editor)})
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs