http://www.mediawiki.org/wiki/Special:Code/MediaWiki/89750

Revision: 89750
Author:   diederik
Date:     2011-06-08 20:51:14 +0000 (Wed, 08 Jun 2011)
Log Message:
-----------
Added support for detecting speedy deletions in the XML dump.

Modified Paths:
--------------
    trunk/tools/editor_trends/etl/variables.py

Modified: trunk/tools/editor_trends/etl/variables.py
===================================================================
--- trunk/tools/editor_trends/etl/variables.py  2011-06-08 20:45:44 UTC (rev 
89749)
+++ trunk/tools/editor_trends/etl/variables.py  2011-06-08 20:51:14 UTC (rev 
89750)
@@ -19,8 +19,11 @@
 __version__ = '0.1'
 
 import hashlib
+import re
 from xml.etree.cElementTree import dump
 
+RE_DEL_ARTICLE = re.compile('/GA[\d]{1,2}')
+RE_SPEEDY_DELETION = re.compile('\{\{db\-[a-z\d]*\}\}') 
#http://en.wikipedia.org/wiki/Wikipedia:Criteria_for_speedy_deletion
 
 def validate_hostname(address):
     '''
@@ -68,6 +71,14 @@
     return title.text
 
 
+def detect_speedy_deletion(revision_text):
+    spds = re.findall(RE_SPEEDY_DELETION, revision_text)
+    templates = {}
+    for spd in spds:
+        templates[spd] = 1
+    return templates
+
+
 def parse_title_meta_data(title, ns, namespaces):
     '''
     This function categorizes an article to assist the Wikimedia Taxonomy
@@ -75,6 +86,7 @@
     
http://meta.wikimedia.org/wiki/Contribution_Taxonomy_Project/Research_Questions
     '''
     title_meta = {}
+    re_ga = re.compile('/GA[\d]')
     if not ns:
         return title_meta
     namespace = '%s:' % namespaces[ns]
@@ -83,10 +95,11 @@
     title_meta['ns'] = ns
     if title.startswith('List of'):
         title_meta['category'] = 'List'
+    elif ns == 1:
+        if re.search(RE_DEL_ARTICLE, title.find):
+            title_meta['category'] = 'Good Article'
     elif ns == 4 or ns == 5:
-        if title.find('Articles for deletion') > -1:
-            title_meta['category'] = 'Deletion'
-        elif title.find('Arbitration') > -1:
+        if title.find('Arbitration') > -1:
             title_meta['category'] = 'Arbitration'
         elif title.find('Good Article') > -1:
             title_meta['category'] = 'Good Article'
@@ -108,6 +121,8 @@
                 title_meta['category'] = 'Featured Portal'
             elif title.find('Featured topic candidates') > -1:
                 title_meta['category'] = 'Featured Topic'
+        elif title.find('Articles for deletion') > -1 and title.find('Articles 
for deletion/Log/') > -1:
+            title_meta['category'] = 'Deletion'
 
     #print title_meta
     return title_meta


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to