Awight has submitted this change and it was merged.
Change subject: Count more hide reasons and update log parsing
......................................................................
Count more hide reasons and update log parsing
The updated regex is apparently needed following changes in the
udp-filter parameters (that provides upstream data). The new hide
reasons come from changes in CentralNotice.
Change-Id: I36eeedacc400289dcf586bc4a7cf71ee5f33501b
---
M fundraiser/analytics/management/commands/LoadBannerImpressions2Aggregate.py
M fundraiser/analytics/regex.py
2 files changed, 32 insertions(+), 10 deletions(-)
Approvals:
Awight: Verified; Looks good to me, approved
diff --git
a/fundraiser/analytics/management/commands/LoadBannerImpressions2Aggregate.py
b/fundraiser/analytics/management/commands/LoadBannerImpressions2Aggregate.py
index 68aa723..0c0ba08 100644
---
a/fundraiser/analytics/management/commands/LoadBannerImpressions2Aggregate.py
+++
b/fundraiser/analytics/management/commands/LoadBannerImpressions2Aggregate.py
@@ -175,10 +175,21 @@
"ignored" : 0,
"error" : 0,
"ignore_because": {
+
+ # Coordinate with window.insertBanner() and
window.hideBanner() in CentralNotice
+ #
modules/ext.centralNotice.bannerController/bannerController.js,
+ #
https://www.mediawiki.org/wiki/Extension:CentralNotice/Special:RecordImpression,
+ # and reasons set up in this method.
+ # Note also that "hidecookie" and "hideempty" come from
the values "cookie" and
+ # "empty" respectively.
"file": 0,
"client": 0,
"hidecookie": 0,
"hideempty": 0,
+ "preload": 0,
+ "alterImpressionData": 0,
+ "close": 0,
+ "donate": 0,
"other": 0
}
}
@@ -297,13 +308,24 @@
continue
if "result" in qs and qs["result"][0] == "hide":
results["impression"]["ignored"] += 1
- if "reason" in qs and qs["reason"][0] ==
"cookie":
-
results["impression"]["ignore_because"]["hidecookie"] += 1
- elif "reason" in qs and qs["reason"][0] ==
"empty":
-
results["impression"]["ignore_because"]["hideempty"] += 1
- else:
-
results["impression"]["ignore_because"]["other"] += 1
+
+ if "reason" in qs:
+
+ # Switch "cookie" to "hidecookie" and
"empty" to "hideempty"
+ # for consistency with legacy reasons in
the database
+ reason = qs["reason"][0]
+ if reason == "cookie":
+ reason = "hidecookie"
+ if reason == "empty":
+ reason = "hideempty"
+
+ if reason in
results["impression"]["ignore_because"]:
+
results["impression"]["ignore_because"][reason] += 1
+ else:
+
results["impression"]["ignore_because"]["other"] += 1
+
continue
+
results["impression"]["error"] += 1
if self.verbose:
self.logger.exception("** INVALID BANNER
IMPRESSION - NOT ENOUGH DATA TO RECORD **")
diff --git a/fundraiser/analytics/regex.py b/fundraiser/analytics/regex.py
index 0b4fb49..5a0fca3 100644
--- a/fundraiser/analytics/regex.py
+++ b/fundraiser/analytics/regex.py
@@ -3,7 +3,7 @@
# Regex based on http://wikitech.wikimedia.org/view/Squid_log_format
squidline = re.compile(
r"""
- (?P<squid>[\S]+) # Name of the squid server
+ ^(?P<squid>[\S]+) # Name of the squid server
\s[-]*
(?P<sequence>[0-9]+) # Sequence ID from the squid server
\s
@@ -15,7 +15,7 @@
\s
(?P<squidstatus>[\S]+) # Squid request status and HTTP status code
\s
- (?P<reply>[0-9]+) # Reply size including HTTP headers
+ (?P<reply>[0-9|-]+) # Reply size including HTTP headers
\s
(?P<request>[\S]+) # Request type
\s
@@ -27,13 +27,13 @@
\s
(?P<referrer>[\S]+) # Referer header
\s
- (?P<xff>[\S]+) # X-Forwarded-For header
+ (?P<xff>[\S]+)? # X-Forwarded-For header
\s
(?P<useragent>[\S\s]+) # User-Agent header
\s
(?P<acceptlanguage>[\S\s]+) # Accept-Language header
\s
- (?P<xcarrier>[\S\s]+) # X-carrier header
+ (?P<xcarrier>[\S\s]+)$ # X-carrier header
""", re.VERBOSE
)
--
To view, visit https://gerrit.wikimedia.org/r/168343
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I36eeedacc400289dcf586bc4a7cf71ee5f33501b
Gerrit-PatchSet: 3
Gerrit-Project: wikimedia/fundraising/tools/DjangoBannerStats
Gerrit-Branch: master
Gerrit-Owner: AndyRussG <[email protected]>
Gerrit-Reviewer: AndyRussG <[email protected]>
Gerrit-Reviewer: Awight <[email protected]>
Gerrit-Reviewer: Ejegg <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits