Addshore has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/255464

Change subject: api log scanner format whitelist
......................................................................

api log scanner format whitelist

Also move checks of validity to
just before writing to graphite.

Change-Id: Ic701035a06d66d6b357d3694ec426b263069ec38
---
M graphite/api/logScanner.php
1 file changed, 19 insertions(+), 6 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/limn-wikidata-data 
refs/changes/64/255464/1

diff --git a/graphite/api/logScanner.php b/graphite/api/logScanner.php
index 8a90541..81283e2 100644
--- a/graphite/api/logScanner.php
+++ b/graphite/api/logScanner.php
@@ -18,6 +18,17 @@
        private $dayAfter;
        private $targetDate;
 
+       private $formatWhitelist = array(
+               'dbg', 'dbgfm',
+               'json', 'jsonfm',
+               'php', 'phpfm',
+               'raw', 'rawfm',
+               'txt', 'txtfm',
+               'xmlk', 'xmlfm',
+               'yaml', 'yamlfm',
+               'none',
+       );
+
        /**
         * @param string $targetDate must be parse-able by PHP
         */
@@ -66,9 +77,7 @@
                                                        // Extract the property 
(if set)
                                                        if( $propertyStart = ( 
strpos( $line, ' property=' ) + 10 ) ) {
                                                                $property = 
strtoupper( substr( $line, $propertyStart, strpos( $line, ' ', $propertyStart ) 
- $propertyStart ) );
-                                                               if( strpos( 
$property, 'P' ) === 0 ) {
-                                                                       
@$counters['wbgetclaims.properties'][$property]++;
-                                                               }
+                                                               
@$counters['wbgetclaims.properties'][$property]++;
                                                        }
 
                                                }
@@ -79,9 +88,7 @@
                                // Extract the format (if set)
                                if( $formatStart = ( strpos( $line, ' format=' 
) + 8 ) ) {
                                        $format = strtolower( substr( $line, 
$formatStart, strpos( $line, ' ', $formatStart ) - $formatStart ) );
-                                       if( $format !== '' ) {
-                                               
@$counters['formats'][$format]++;
-                                       }
+                                       @$counters['formats'][$format]++;
                                }
 
                        }
@@ -91,6 +98,12 @@
                // Send everything to graphite!
                foreach( $counters as $name => $counter ) {
                        foreach( $counter as $key => $value ) {
+                               if(
+                                       ( $name == 'wbgetclaims.properties' && 
strpos( $key, 'P' ) !== 0 ) ||
+                                       ( $name == 'formats' && !in_array( 
$key, $this->formatWhitelist ) )
+                               ) {
+                                       continue;
+                               }
                                $this->sendMetric(
                                        'daily.wikidata.api.' . $name . '.' . 
$key,
                                        $value

-- 
To view, visit https://gerrit.wikimedia.org/r/255464
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic701035a06d66d6b357d3694ec426b263069ec38
Gerrit-PatchSet: 1
Gerrit-Project: analytics/limn-wikidata-data
Gerrit-Branch: master
Gerrit-Owner: Addshore <addshorew...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to