Aaron Schulz has submitted this change and it was merged.

Change subject: Add better error logging for DB getLag() calls
......................................................................


Add better error logging for DB getLag() calls

Bug: T32257
Change-Id: I4ea5db670fe96d20b1d593cc2d759f9c3f570790
---
M includes/db/DatabaseMysqlBase.php
M includes/db/loadbalancer/LoadBalancer.php
M includes/db/loadbalancer/LoadMonitorMySQL.php
3 files changed, 45 insertions(+), 13 deletions(-)

Approvals:
  BryanDavis: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/includes/db/DatabaseMysqlBase.php 
b/includes/db/DatabaseMysqlBase.php
index 5bcfd66..3a8f737 100644
--- a/includes/db/DatabaseMysqlBase.php
+++ b/includes/db/DatabaseMysqlBase.php
@@ -654,6 +654,13 @@
        protected function getLagFromPtHeartbeat() {
                $masterInfo = $this->getMasterServerInfo();
                if ( !$masterInfo ) {
+                       wfLogDBError(
+                               "Unable to query master of {db_server} for 
server ID",
+                               $this->getLogContext( array(
+                                       'method' => __METHOD__
+                               ) )
+                       );
+
                        return false; // could not get master server ID
                }
 
@@ -666,6 +673,13 @@
                        return max( $nowUnix - $timeUnix, 0.0 );
                }
 
+               wfLogDBError(
+                       "Unable to find pt-heartbeat row for {db_server}",
+                       $this->getLogContext( array(
+                               'method' => __METHOD__
+                       ) )
+               );
+
                return false;
        }
 
diff --git a/includes/db/loadbalancer/LoadBalancer.php 
b/includes/db/loadbalancer/LoadBalancer.php
index 85a6bf0..3fe9638 100644
--- a/includes/db/loadbalancer/LoadBalancer.php
+++ b/includes/db/loadbalancer/LoadBalancer.php
@@ -190,11 +190,13 @@
                                if ( isset( $this->mServers[$i]['max lag'] ) ) {
                                        $maxServerLag = min( $maxServerLag, 
$this->mServers[$i]['max lag'] );
                                }
+
+                               $host = $this->getServerName( $i );
                                if ( $lag === false ) {
-                                       wfDebugLog( 'replication', "Server #$i 
is not replicating" );
+                                       wfDebugLog( 'replication', "Server 
$host (#$i) is not replicating?" );
                                        unset( $loads[$i] );
                                } elseif ( $lag > $maxServerLag ) {
-                                       wfDebugLog( 'replication', "Server #$i 
is excessively lagged ($lag seconds)" );
+                                       wfDebugLog( 'replication', "Server 
$host (#$i) has >= $lag seconds of lag" );
                                        unset( $loads[$i] );
                                }
                        }
@@ -679,9 +681,7 @@
         *
         * @param int $i Server index
         * @param string|bool $wiki Wiki ID, or false for the current wiki
-        * @return DatabaseBase
-        *
-        * @access private
+        * @return DatabaseBase|bool Returns false on errors
         */
        public function openConnection( $i, $wiki = false ) {
                if ( $wiki !== false ) {
diff --git a/includes/db/loadbalancer/LoadMonitorMySQL.php 
b/includes/db/loadbalancer/LoadMonitorMySQL.php
index 31f6163..e251501 100644
--- a/includes/db/loadbalancer/LoadMonitorMySQL.php
+++ b/includes/db/loadbalancer/LoadMonitorMySQL.php
@@ -88,18 +88,33 @@
 
                $lagTimes = array();
                foreach ( $serverIndexes as $i ) {
-                       if ( $i == 0 ) { # Master
-                               $lagTimes[$i] = 0;
+                       if ( $i == $this->parent->getWriterIndex() ) {
+                               $lagTimes[$i] = 0; // master always has no lag
                                continue;
                        }
+
                        $conn = $this->parent->getAnyOpenConnection( $i );
-                       if ( $conn !== false ) {
-                               $lagTimes[$i] = $conn->getLag();
+                       if ( $conn ) {
+                               $close = false; // already open
+                       } else {
+                               $conn = $this->parent->openConnection( $i, 
$wiki );
+                               $close = true; // new connection
+                       }
+
+                       if ( !$conn ) {
+                               $lagTimes[$i] = false;
+                               $host = $this->parent->getServerName( $i );
+                               wfDebugLog( 'replication', __METHOD__ . ": host 
$host (#$i) is unreachable" );
                                continue;
                        }
-                       $conn = $this->parent->openConnection( $i, $wiki );
-                       if ( $conn !== false ) {
-                               $lagTimes[$i] = $conn->getLag();
+
+                       $lagTimes[$i] = $conn->getLag();
+                       if ( $lagTimes[$i] === false ) {
+                               $host = $this->parent->getServerName( $i );
+                               wfDebugLog( 'replication', __METHOD__ . ": host 
$host (#$i) is not replicating?" );
+                       }
+
+                       if ( $close ) {
                                # Close the connection to avoid sleeper 
connections piling up.
                                # Note that the caller will pick one of these 
DBs and reconnect,
                                # which is slightly inefficient, but this only 
matters for the lag
@@ -124,7 +139,10 @@
        }
 
        private function getLagTimeCacheKey() {
+               $writerIndex = $this->parent->getWriterIndex();
                // Lag is per-server, not per-DB, so key on the master DB name
-               return $this->srvCache->makeGlobalKey( 'lag-times', 
$this->parent->getServerName( 0 ) );
+               return $this->srvCache->makeGlobalKey(
+                       'lag-times', $this->parent->getServerName( $writerIndex 
)
+               );
        }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/264015
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I4ea5db670fe96d20b1d593cc2d759f9c3f570790
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Aaron Schulz <[email protected]>
Gerrit-Reviewer: Aaron Schulz <[email protected]>
Gerrit-Reviewer: BryanDavis <[email protected]>
Gerrit-Reviewer: Parent5446 <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to