jenkins-bot has submitted this change and it was merged.
Change subject: Allow reindex count to differ from old count
......................................................................
Allow reindex count to differ from old count
This is required because we reindex while handling live traffic. We
should be doing a forceSearchIndex after each in place reindex over the
time of the reindex to catch up. I've added documentation to that
effect.
Change-Id: I55b3f21d6ddff8cdd7325d00cb67d25487d6ecb4
---
M README
M maintenance/updateOneSearchIndexConfig.php
2 files changed, 40 insertions(+), 9 deletions(-)
Approvals:
Chad: Looks good to me, approved
jenkins-bot: Verified
diff --git a/README b/README
index af1a245..8411c4f 100644
--- a/README
+++ b/README
@@ -107,11 +107,18 @@
only brings down search for a few seconds _but_ it might be incompatible with
some updates. Safest to
never use it unless you know for sure:
php updateSearchIndexConfig.php --closeOk
- php forceSearchIndex
+ php forceSearchIndex.php
-If you must do (2) readlly have only one option:
+If you must do (2) really have only one option:
A. Build of a copy of the index and reindex to it:
php updateSearchIndexConfig.php --reindexAndRemoveOk --indexIdentifier now
+ php forceSearchIndex.php --from <time when you started
updateSearchIndexConfig.php in YYYY-mm-ddTHH:mm:ssZ> --deletes
+ php forceSearchIndex.php --from <time when you started
updateSearchIndexConfig.php in YYYY-mm-ddTHH:mm:ssZ>
+or for the Bash inclined:
+ TZ=UTC export REINDEX_START=$(date +%Y-%m-%dT%H:%m:%SZ)
+ php updateSearchIndexConfig.php --reindexAndRemoveOk --indexIdentifier now
+ php forceSearchIndex.php --from $REINDEX_START --deletes
+ php forceSearchIndex.php --from $REINDEX_START
If you must do (3) you have two options:
A. Same as (2.A)
diff --git a/maintenance/updateOneSearchIndexConfig.php
b/maintenance/updateOneSearchIndexConfig.php
index 74b9739..7f589b7 100644
--- a/maintenance/updateOneSearchIndexConfig.php
+++ b/maintenance/updateOneSearchIndexConfig.php
@@ -45,14 +45,20 @@
private $removeIndecies = false;
/**
- * @var are there too few replicas in the index we're making?
+ * @var boolean are there too few replicas in the index we're making?
*/
private $tooFewReplicas = false;
/**
- * @var number of processes to use when reindexing
+ * @var int number of processes to use when reindexing
*/
private $reindexProcesses;
+
+ /**
+ * @var float how much can the reindexed copy of an index is allowed to
deviate from the current
+ * copy without triggering a reindex failure
+ */
+ private $reindexAcceptableCountDeviation;
public function __construct() {
parent::__construct();
@@ -90,6 +96,9 @@
"performed during this operation manually. Defaults to
false." );
$maintenance->addOption( 'reindexProcesses', 'Number of
processess to use in reindex. ' .
'Not supported on Windows. Defaults to 1 on Windows
and 10 otherwise.', false, true );
+ $maintenance->addOption( 'reindexAcceptableCountDeviation',
'How much can the reindexed ' .
+ 'copy of an index is allowed to deviate from the
current copy without triggering a ' .
+ 'reindex failure. Defaults to 5%.', false, true );
}
public function execute() {
@@ -114,6 +123,8 @@
$this->indexIdentifier =
$this->pickIndexIdentifierFromOption( $this->getOption( 'indexIdentifier',
'current' ) );
$this->reindexAndRemoveOk = $this->getOption(
'reindexAndRemoveOk', false );
$this->reindexProcesses = $this->getOption(
'reindexProcesses', wfIsWindows() ? 1 : 10 );
+ $this->reindexAcceptableCountDeviation =
self::parsePotentialPercent(
+ $this->getOption(
'reindexAcceptableCountDeviation', '5%' ) );
$this->validateIndex();
$this->validateAnalyzers();
@@ -469,12 +480,17 @@
}
$this->output( $this->indent . "Verifying counts..." );
- $oldCount = CirrusSearchConnection::getPageType(
$this->indexType )->count();
+ // We can't verify counts are exactly equal because
they won't be - we still push updates into
+ // the old index while reindexing the new one.
+ $oldCount = (float)
CirrusSearchConnection::getPageType( $this->indexType )->count();
$this->getIndex()->refresh();
- $newCount = $this->getPageType()->count();
- if ( $oldCount !== $newCount ) {
- $this->output( "Different! Expected $oldCount
but got $newCount\n" );
- $this->error( "Failed to load index. Expected
$oldCount but got $newCount. Check for warnings above.", 1 );
+ $newCount = (float) $this->getPageType()->count();
+ $difference = $oldCount > 0 ? abs( $oldCount -
$newCount ) / $oldCount : 0;
+ if ( $difference >
$this->reindexAcceptableCountDeviation ) {
+ $this->output( "Not close enough!
old=$oldCount new=$newCount difference=$difference\n" );
+ $this->error( 'Failed to load index - counts
not close enough. ' .
+ "old=$oldCount new=$newCount
difference=$difference. " .
+ 'Check for warnings above.', 1 );
}
$this->output( "done\n" );
} else {
@@ -698,6 +714,14 @@
}
return $result;
}
+
+ private static function parsePotentialPercent( $str ) {
+ $result = floatval( $str );
+ if ( strpos( $str, '%' ) === false ) {
+ return $result;
+ }
+ return $result / 100;
+ }
}
$maintClass = "UpdateOneSearchIndexConfig";
--
To view, visit https://gerrit.wikimedia.org/r/88729
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I55b3f21d6ddff8cdd7325d00cb67d25487d6ecb4
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <[email protected]>
Gerrit-Reviewer: Chad <[email protected]>
Gerrit-Reviewer: Manybubbles <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits