jenkins-bot has submitted this change and it was merged.

Change subject: Log in new format compatible with avro schema
......................................................................


Log in new format compatible with avro schema

Logs into the new format which will stream into kafka and then
into hadoop. This should cover the use cases of both existing
olc logging streams, so ideally those get removed in a followup.
Can't clean them up yet though, those logs are needed until
the transition to this new pipeline is complete.

Sampling the logs or turning them on/off is handled in the
mediawiki-config repository. Additionally that is where
the avro schema lives.

Related mediawiki-config patch: Ib1a4743
Related refinery-source patch: I8112b2c9b

Bug: T103505
Change-Id: If280816ac13df94bd123097d2215ab679e753812
---
M includes/ElasticsearchIntermediary.php
M includes/Searcher.php
2 files changed, 80 insertions(+), 9 deletions(-)

Approvals:
  Cindy-the-browser-test-bot: Looks good to me, but someone else must approve
  DCausse: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/includes/ElasticsearchIntermediary.php 
b/includes/ElasticsearchIntermediary.php
index eb11b25..3ada45d 100644
--- a/includes/ElasticsearchIntermediary.php
+++ b/includes/ElasticsearchIntermediary.php
@@ -74,9 +74,9 @@
        static private $executionId;
 
        /**
-        * @var array[]|null Result of self::getLogContext for each request in 
this process
+        * @var array[] Result of self::getLogContext for each request in this 
process
         */
-       static private $logContexts;
+       static private $logContexts = array();
 
        /**
         * Constructor.
@@ -116,11 +116,80 @@
         * Only public due to php 5.3 not having access from closures
         */
        public static function reportLogContexts() {
-               global $wgRequest;
-
                if ( !self::$logContexts ) {
                        return;
                }
+               self::buildRequestSetLog();
+               self::buildUserTestingLog();
+               self::$logContexts = array();
+       }
+
+       /**
+        * Builds and ships a log context that is serialized to an avro
+        * schema. Avro is very specific that all fields must be defined,
+        * even if they have a default, and that types must match exactly.
+        * "5" is not an int as much as php would like it to be.
+        *
+        * Avro will happily ignore fields that are present but not used. To
+        * add new fields to the schema they must first be added here and
+        * deployed. Then the schema can be updated. Removing goes in reverse,
+        * adjust the schema to ignore the column, then deploy code no longer
+        * providing it.
+        */
+       private static function buildRequestSetLog() {
+               global $wgRequest;
+
+               // for the moment these are still created in the old format to 
serve
+               // the old log formats, so here we transform the context into 
the new
+               // request format. At some point the context should just be 
created in
+               // the correct format.
+               $requests = array();
+               foreach ( self::$logContexts as $context ) {
+                       $request = array(
+                               'query' => isset( $context['query'] ) ? 
(string) $context['query'] : '',
+                               'queryType' => isset( $context['queryType'] ) ? 
(string) $context['queryType'] : '',
+                               // populated below
+                               'indices' => array(),
+                               'tookMs' => isset( $context['tookMs'] ) ? (int) 
$context['tookMs'] : -1,
+                               'elasticTookMs' => isset( 
$context['elasticTookMs'] ) ? (int) $context['elasticTookMs'] : -1,
+                               'limit' => isset( $context['limit'] ) ? (int) 
$context['limit'] : -1,
+                               'hitsTotal' => isset( $context['hitsTotal'] ) ? 
(int) $context['hitsTotal'] : -1,
+                               'hitsReturned' => isset( 
$context['hitsReturned'] ) ? (int) $context['hitsReturned'] : -1,
+                               'hitsOffset' => isset( $context['hitsOffset'] ) 
? (int) $context['hitsOffset'] : -1,
+                               // populated below
+                               'namespaces' => array(),
+                               'suggestion' => isset( $context['suggestion'] ) 
? (string) $context['suggestion'] : '',
+                               'suggestionRequested' => isset( 
$context['suggestion'] )
+                       );
+
+                       if ( isset( $context['index'] ) ) {
+                               $request['indices'][] = $context['index'];
+                       }
+                       if ( isset( $context['namespaces'] ) ) {
+                               foreach ( $context['namespaces'] as $id ) {
+                                       $request['namespaces'][] = (int) $id;
+                               }
+                       }
+                       $requests[] = $request;
+               }
+
+               $requestSet = array(
+                       'timestamp' => time(),
+                       'wikiId' => wfWikiId(),
+                       'source' => self::getExecutionContext(),
+                       'identity' => self::generateIdentToken(),
+                       'ip' => $wgRequest->getIP() ?: '',
+                       'userAgent' => $wgRequest->getHeader( 'User-Agent') ?: 
'',
+                       'backendUserTests' => 
UserTesting::getInstance()->getActiveTestNames(),
+                       'requests' => $requests,
+               );
+
+               LoggerFactory::getInstance( 'CirrusSearchRequestSet' )->debug( 
'', $requestSet );
+       }
+
+       private static function buildUserTestingLog() {
+               global $wgRequest;
+
                $ut = UserTesting::getInstance();
                if ( !$ut->getActiveTestNames() ) {
                        return;
@@ -161,14 +230,12 @@
                        FormatJson::encode( $parameters ),
                );
 
-               $tests = array();
                $logger = LoggerFactory::getInstance( 'CirrusSearchUserTesting' 
);
                foreach ( $ut->getActiveTestNames() as $test ) {
                        $bucket = $ut->getBucket( $test );
                        $message[1] = "{$test}-{$bucket}";
                        $logger->debug( implode( "\t", $message ) );
                }
-               self::$logContexts = null;
        }
 
        /**
@@ -360,7 +427,7 @@
                        $namespaces = implode( ', ', $context['namespaces'] );
                        $message .= " within these namespaces: $namespaces";
                }
-               if ( isset( $context['suggestion'] ) ) {
+               if ( isset( $context['suggestion'] ) && strlen( 
$context['suggestion'] ) > 0 ) {
                        $message .= " and suggested '{suggestion}'";
                }
                $message .= ". Requested via {source} for {identity} by 
executor {executor}";
@@ -418,11 +485,10 @@
                        }
                }
 
-               if ( self::$logContexts === null ) {
+               if ( count( self::$logContexts ) === 0 ) {
                        DeferredUpdates::addCallableUpdate( function () {
                                ElasticsearchIntermediary::reportLogContexts();
                        } );
-                       self::$logContexts = array();
                }
                self::$logContexts[] = $params;
 
diff --git a/includes/Searcher.php b/includes/Searcher.php
index b98e8e3..a5d457b 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -1427,6 +1427,11 @@
                $logContext = array(
                        'queryType' => $type,
                        'query' => $for,
+                       'limit' => $this->limit ?: null,
+                       // null means not requested, '' means not found. If 
found
+                       // parent::buildLogContext will replace the '' with an
+                       // actual suggestion.
+                       'suggestion' => $this->suggest ? '' : null,
                );
 
                if ( $this->returnQuery ) {

-- 
To view, visit https://gerrit.wikimedia.org/r/240041
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: If280816ac13df94bd123097d2215ab679e753812
Gerrit-PatchSet: 12
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <[email protected]>
Gerrit-Reviewer: Cindy-the-browser-test-bot <[email protected]>
Gerrit-Reviewer: DCausse <[email protected]>
Gerrit-Reviewer: EBernhardson <[email protected]>
Gerrit-Reviewer: Manybubbles <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to