http://www.mediawiki.org/wiki/Special:Code/MediaWiki/90015

Revision: 90015
Author:   rfaulk
Date:     2011-06-13 22:06:19 +0000 (Mon, 13 Jun 2011)
Log Message:
-----------
modified mine_squid_landing_page_requests and evaluate_landing_url methods to 
properly handle mining requests.
slight modification of DataMapper db and cursor member names for consistency 

Modified Paths:
--------------
    trunk/fundraiser-statistics/fundraiser-scripts/classes/DataMapper.py

Modified: trunk/fundraiser-statistics/fundraiser-scripts/classes/DataMapper.py
===================================================================
--- trunk/fundraiser-statistics/fundraiser-scripts/classes/DataMapper.py        
2011-06-13 22:02:30 UTC (rev 90014)
+++ trunk/fundraiser-statistics/fundraiser-scripts/classes/DataMapper.py        
2011-06-13 22:06:19 UTC (rev 90015)
@@ -131,8 +131,8 @@
 """
 class FundraiserDataMapper(DataMapper):
     
-    _db = None
-    _cur = None
+    _db_ = None
+    _cur_ = None
     
     _impression_table_name_ = 'banner_impressions'
     _landing_page_table_name_ = 'landing_page_requests'
@@ -146,13 +146,13 @@
     
     """ !! MODIFY -- use dataloaders! """ 
     def _init_db(self):     
-        self._db = MySQLdb.connect(host='127.0.0.1', user='rfaulk', 
db='faulkner', port=3307)
-        self._cur = self._db.cursor()
+        self._db_ = MySQLdb.connect(host='127.0.0.1', user='rfaulk', 
db='faulkner', port=3307)
+        self._cur_ = self._db_.cursor()
         
     """ !! MODIFY -- use dataloaders! """ 
     def _close_db(self):
-        self._cur.close()
-        self._db.close()
+        self._cur_.close()
+        self._db_.close()
         
         
     
@@ -168,7 +168,7 @@
             deleteStmnt = 'delete from ' + self._landing_page_table_name_ + ' 
where start_timestamp = \'' + timestamp + '\';'
         
         try:
-            self._cur.execute(deleteStmnt)
+            self._cur_.execute(deleteStmnt)
             print >> sys.stdout, "Executed delete from impression: " + 
deleteStmnt
         except:
             print >> sys.stderr, "Could not execute delete:\n" + deleteStmnt + 
"\nResuming insert ..."
@@ -356,9 +356,9 @@
                                     val = '(' + start_timestamp_in + ',\'' + 
banner + '\',\'' + project + '\',\'' + country + '\',\'' + lang + '\',' \
                                     + str(count) + ',' + time_stamp_in + ');'
 
-                                    self._cur.execute(insertStmt + val)
+                                    self._cur_.execute(insertStmt + val)
                                 except:
-                                    self._db.rollback()
+                                    self._db_.rollback()
                                     sys.exit("Database Interface Exception - 
Could not execute statement:\n" + insertStmt + val)
     
                 # Re-initialize counts
@@ -400,8 +400,6 @@
         start_timestamp_in = "convert(\'" + start + "\', datetime)"
         curr_time = TP.timestamp_from_obj(datetime.datetime.now(),1,3)
         
-        count_parse = 0
-        
         """ retrieve the start time of the log """
         start = self.get_first_timestamp_from_log(logFileName)
         
@@ -547,10 +545,21 @@
                 landing_url = lineArgs[8]
             except IndexError:
                 landing_url = 'Unavailable'
-    
-            include_request, index_str_flag = 
self.evaluate_landing_url(landing_url)
+                
+            hostIndex = 1
+            queryIndex = 4
+            pathIndex = 2
             
-           
+            parsed_landing_url = up.urlparse(landing_url)
+            query_fields = cgi.parse_qs(parsed_landing_url[queryIndex]) # Get 
the banner name and lang
+            path_pieces = parsed_landing_url[pathIndex].split('/')
+
+            #print ''
+            #print landing_url
+            include_request, index_str_flag = 
self.evaluate_landing_url(landing_url, parsed_landing_url, query_fields, 
path_pieces)
+            #print [include_request, index_str_flag]
+            
+            
             if include_request:
                 
                 """ Address cases where the query string contains the landing 
page - ...wikimediafoundation.org/w/index.php?... """
@@ -573,7 +582,7 @@
                             
                     except:
                         landing_page = 'NONE'
-                        country = Hlp.localize_IP(self._cur, ip_add)
+                        country = Hlp.localize_IP(self._cur_, ip_add)
                         
                 else: 
                     """ Address cases where the query string does not contain 
the landing page - ...wikimediafoundation.org/wiki/... """
@@ -597,11 +606,11 @@
                                 country = landing_path[3]
                                 
                         except:
-                            country =  Hlp.localize_IP(self._cur, ip_add) 
+                            country =  Hlp.localize_IP(self._cur_, ip_add) 
                 
                 # If country is confused with the language use the ip
                 if country == country.lower():
-                    country = Hlp.localize_IP(self._cur, ip_add) 
+                    country = Hlp.localize_IP(self._cur_, ip_add) 
                                 
                 # ensure fields exist
                 try:
@@ -621,8 +630,8 @@
                     + project + '\',\'' +  ip_add + '\',' + 'convert(\'' + 
timestamp_string + '\', datetime)' + ');'
                     
                     #print insertStmt + val
-                    self._cur.execute(insertStmt_lp + val)
-    
+                    self._cur_.execute(insertStmt_lp + val)
+                    
                 except:
                     print "Could not insert:\n" + insertStmt_lp + val
                     pass
@@ -695,16 +704,12 @@
     """
         Parses the landing url and determines if its valid
     """
-    def evaluate_landing_url(self, landing_url):        
+    def evaluate_landing_url(self, landing_url, parsed_landing_url, 
query_fields, path_pieces):        
         
         hostIndex = 1
         queryIndex = 4
         pathIndex = 2
 
-        parsed_landing_url = up.urlparse(landing_url)
-        query_fields = cgi.parse_qs(parsed_landing_url[queryIndex]) # Get the 
banner name and lang
-        path_pieces = parsed_landing_url[pathIndex].split('/')
-        
         """ 
             Filter the landing URLs
         
@@ -714,11 +719,11 @@
             Evaluate conditions which determine acceptance of request based on 
the landing url 
         """
         try: 
+            c1 = re.search('WMF', path_pieces[2] ) != None or 
re.search('Junetesting001', path_pieces[2] ) != None 
+            c2 = re.search('Hear_from_Kartika', path_pieces[2]) != None
             
-            c1 = re.search('WMF', path_pieces[2] ) != None or 
re.search('Junetesting001', path_pieces[2] ) != None 
-            c2 = re.search('Hear_from_Kartika', path_pieces[2])   != None
             cond1 = parsed_landing_url[hostIndex] == 'wikimediafoundation.org' 
and path_pieces[1] == 'wiki' and (c1 or c2)
-            
+
             c1 = re.search('index.php', path_pieces[2] )  != None
             index_str_flag = c1
             
@@ -727,16 +732,17 @@
             except KeyError:
                 c2 = 0
             cond2 = (parsed_landing_url[hostIndex] == 
'wikimediafoundation.org' and path_pieces[1] == 'w' and c1 and c2)
-            
-            if cond2:
-                count_parse = count_parse + 1
-                
+                            
             regexp_res = re.search('Special:LandingCheck',landing_url)
             cond3 = (regexp_res == None)
             
             return [(cond1 or cond2) and cond3, index_str_flag]
              
-        except: 
-            return [0, 0]
+        except Exception as e: 
+            #print type(e)     # the exception instance
+            #print e.args      # arguments stored in .args
+            #print e           # __str__ allows args to printed directly
+            
+            return [False, False]
 
     
\ No newline at end of file


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to