Aaron Schulz has submitted this change and it was merged.
Change subject: Add better error logging for DB getLag() calls
......................................................................
Add better error logging for DB getLag() calls
Bug: T32257
Change-Id: I4ea5db670fe96d20b1d593cc2d759f9c3f570790
---
M includes/db/DatabaseMysqlBase.php
M includes/db/loadbalancer/LoadBalancer.php
M includes/db/loadbalancer/LoadMonitorMySQL.php
3 files changed, 45 insertions(+), 13 deletions(-)
Approvals:
BryanDavis: Looks good to me, approved
jenkins-bot: Verified
diff --git a/includes/db/DatabaseMysqlBase.php
b/includes/db/DatabaseMysqlBase.php
index 5bcfd66..3a8f737 100644
--- a/includes/db/DatabaseMysqlBase.php
+++ b/includes/db/DatabaseMysqlBase.php
@@ -654,6 +654,13 @@
protected function getLagFromPtHeartbeat() {
$masterInfo = $this->getMasterServerInfo();
if ( !$masterInfo ) {
+ wfLogDBError(
+ "Unable to query master of {db_server} for
server ID",
+ $this->getLogContext( array(
+ 'method' => __METHOD__
+ ) )
+ );
+
return false; // could not get master server ID
}
@@ -666,6 +673,13 @@
return max( $nowUnix - $timeUnix, 0.0 );
}
+ wfLogDBError(
+ "Unable to find pt-heartbeat row for {db_server}",
+ $this->getLogContext( array(
+ 'method' => __METHOD__
+ ) )
+ );
+
return false;
}
diff --git a/includes/db/loadbalancer/LoadBalancer.php
b/includes/db/loadbalancer/LoadBalancer.php
index 85a6bf0..3fe9638 100644
--- a/includes/db/loadbalancer/LoadBalancer.php
+++ b/includes/db/loadbalancer/LoadBalancer.php
@@ -190,11 +190,13 @@
if ( isset( $this->mServers[$i]['max lag'] ) ) {
$maxServerLag = min( $maxServerLag,
$this->mServers[$i]['max lag'] );
}
+
+ $host = $this->getServerName( $i );
if ( $lag === false ) {
- wfDebugLog( 'replication', "Server #$i
is not replicating" );
+ wfDebugLog( 'replication', "Server
$host (#$i) is not replicating?" );
unset( $loads[$i] );
} elseif ( $lag > $maxServerLag ) {
- wfDebugLog( 'replication', "Server #$i
is excessively lagged ($lag seconds)" );
+ wfDebugLog( 'replication', "Server
$host (#$i) has >= $lag seconds of lag" );
unset( $loads[$i] );
}
}
@@ -679,9 +681,7 @@
*
* @param int $i Server index
* @param string|bool $wiki Wiki ID, or false for the current wiki
- * @return DatabaseBase
- *
- * @access private
+ * @return DatabaseBase|bool Returns false on errors
*/
public function openConnection( $i, $wiki = false ) {
if ( $wiki !== false ) {
diff --git a/includes/db/loadbalancer/LoadMonitorMySQL.php
b/includes/db/loadbalancer/LoadMonitorMySQL.php
index 31f6163..e251501 100644
--- a/includes/db/loadbalancer/LoadMonitorMySQL.php
+++ b/includes/db/loadbalancer/LoadMonitorMySQL.php
@@ -88,18 +88,33 @@
$lagTimes = array();
foreach ( $serverIndexes as $i ) {
- if ( $i == 0 ) { # Master
- $lagTimes[$i] = 0;
+ if ( $i == $this->parent->getWriterIndex() ) {
+ $lagTimes[$i] = 0; // master always has no lag
continue;
}
+
$conn = $this->parent->getAnyOpenConnection( $i );
- if ( $conn !== false ) {
- $lagTimes[$i] = $conn->getLag();
+ if ( $conn ) {
+ $close = false; // already open
+ } else {
+ $conn = $this->parent->openConnection( $i,
$wiki );
+ $close = true; // new connection
+ }
+
+ if ( !$conn ) {
+ $lagTimes[$i] = false;
+ $host = $this->parent->getServerName( $i );
+ wfDebugLog( 'replication', __METHOD__ . ": host
$host (#$i) is unreachable" );
continue;
}
- $conn = $this->parent->openConnection( $i, $wiki );
- if ( $conn !== false ) {
- $lagTimes[$i] = $conn->getLag();
+
+ $lagTimes[$i] = $conn->getLag();
+ if ( $lagTimes[$i] === false ) {
+ $host = $this->parent->getServerName( $i );
+ wfDebugLog( 'replication', __METHOD__ . ": host
$host (#$i) is not replicating?" );
+ }
+
+ if ( $close ) {
# Close the connection to avoid sleeper
connections piling up.
# Note that the caller will pick one of these
DBs and reconnect,
# which is slightly inefficient, but this only
matters for the lag
@@ -124,7 +139,10 @@
}
private function getLagTimeCacheKey() {
+ $writerIndex = $this->parent->getWriterIndex();
// Lag is per-server, not per-DB, so key on the master DB name
- return $this->srvCache->makeGlobalKey( 'lag-times',
$this->parent->getServerName( 0 ) );
+ return $this->srvCache->makeGlobalKey(
+ 'lag-times', $this->parent->getServerName( $writerIndex
)
+ );
}
}
--
To view, visit https://gerrit.wikimedia.org/r/264015
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I4ea5db670fe96d20b1d593cc2d759f9c3f570790
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Aaron Schulz <[email protected]>
Gerrit-Reviewer: Aaron Schulz <[email protected]>
Gerrit-Reviewer: BryanDavis <[email protected]>
Gerrit-Reviewer: Parent5446 <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits