Awight has submitted this change and it was merged.

Change subject: Count more hide reasons and update log parsing
......................................................................


Count more hide reasons and update log parsing

The updated regex is apparently needed following changes in the
udp-filter parameters (that provides upstream data). The new hide
reasons come from changes in CentralNotice.

Change-Id: I36eeedacc400289dcf586bc4a7cf71ee5f33501b
---
M fundraiser/analytics/management/commands/LoadBannerImpressions2Aggregate.py
M fundraiser/analytics/regex.py
2 files changed, 32 insertions(+), 10 deletions(-)

Approvals:
  Awight: Verified; Looks good to me, approved



diff --git 
a/fundraiser/analytics/management/commands/LoadBannerImpressions2Aggregate.py 
b/fundraiser/analytics/management/commands/LoadBannerImpressions2Aggregate.py
index 68aa723..0c0ba08 100644
--- 
a/fundraiser/analytics/management/commands/LoadBannerImpressions2Aggregate.py
+++ 
b/fundraiser/analytics/management/commands/LoadBannerImpressions2Aggregate.py
@@ -175,10 +175,21 @@
                 "ignored" : 0,
                 "error" : 0,
                 "ignore_because": {
+
+                    # Coordinate with window.insertBanner() and 
window.hideBanner() in CentralNotice
+                    # 
modules/ext.centralNotice.bannerController/bannerController.js,
+                    # 
https://www.mediawiki.org/wiki/Extension:CentralNotice/Special:RecordImpression,
+                    # and reasons set up in this method.
+                    # Note also that "hidecookie" and "hideempty" come from 
the values "cookie" and
+                    # "empty" respectively.
                     "file": 0,
                     "client": 0,
                     "hidecookie": 0,
                     "hideempty": 0,
+                    "preload": 0,
+                    "alterImpressionData": 0,
+                    "close": 0,
+                    "donate": 0,
                     "other": 0
                 }
             }
@@ -297,13 +308,24 @@
                                 continue
                             if "result" in qs and qs["result"][0] == "hide":
                                 results["impression"]["ignored"] += 1
-                                if "reason" in qs and qs["reason"][0] == 
"cookie":
-                                    
results["impression"]["ignore_because"]["hidecookie"] += 1
-                                elif "reason" in qs and qs["reason"][0] == 
"empty":
-                                    
results["impression"]["ignore_because"]["hideempty"] += 1
-                                else:
-                                    
results["impression"]["ignore_because"]["other"] += 1
+
+                                if "reason" in qs:
+
+                                    # Switch "cookie" to "hidecookie" and 
"empty" to "hideempty"
+                                    # for consistency with legacy reasons in 
the database
+                                    reason = qs["reason"][0]
+                                    if reason == "cookie":
+                                        reason = "hidecookie"
+                                    if reason == "empty":
+                                        reason = "hideempty"
+
+                                    if reason in 
results["impression"]["ignore_because"]:
+                                        
results["impression"]["ignore_because"][reason] += 1
+                                    else:
+                                        
results["impression"]["ignore_because"]["other"] += 1
+
                                 continue
+
                             results["impression"]["error"] += 1
                             if self.verbose:
                                 self.logger.exception("** INVALID BANNER 
IMPRESSION - NOT ENOUGH DATA TO RECORD **")
diff --git a/fundraiser/analytics/regex.py b/fundraiser/analytics/regex.py
index 0b4fb49..5a0fca3 100644
--- a/fundraiser/analytics/regex.py
+++ b/fundraiser/analytics/regex.py
@@ -3,7 +3,7 @@
 # Regex based on http://wikitech.wikimedia.org/view/Squid_log_format
 squidline = re.compile(
     r"""
-        (?P<squid>[\S]+) # Name of the squid server
+        ^(?P<squid>[\S]+) # Name of the squid server
         \s[-]*
         (?P<sequence>[0-9]+) # Sequence ID from the squid server
         \s
@@ -15,7 +15,7 @@
         \s
         (?P<squidstatus>[\S]+) # Squid request status and HTTP status code
         \s
-        (?P<reply>[0-9]+) # Reply size including HTTP headers
+        (?P<reply>[0-9|-]+) # Reply size including HTTP headers
         \s
         (?P<request>[\S]+) # Request type
         \s
@@ -27,13 +27,13 @@
         \s
         (?P<referrer>[\S]+) # Referer header
         \s
-        (?P<xff>[\S]+) # X-Forwarded-For header
+        (?P<xff>[\S]+)? # X-Forwarded-For header
         \s
         (?P<useragent>[\S\s]+) # User-Agent header
         \s
         (?P<acceptlanguage>[\S\s]+) # Accept-Language header
         \s
-        (?P<xcarrier>[\S\s]+) # X-carrier header
+        (?P<xcarrier>[\S\s]+)$ # X-carrier header
     """, re.VERBOSE
 )
 

-- 
To view, visit https://gerrit.wikimedia.org/r/168343
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I36eeedacc400289dcf586bc4a7cf71ee5f33501b
Gerrit-PatchSet: 3
Gerrit-Project: wikimedia/fundraising/tools/DjangoBannerStats
Gerrit-Branch: master
Gerrit-Owner: AndyRussG <[email protected]>
Gerrit-Reviewer: AndyRussG <[email protected]>
Gerrit-Reviewer: Awight <[email protected]>
Gerrit-Reviewer: Ejegg <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to