[MediaWiki-commits] [Gerrit] Checker: properly check if a page is a redirect - change (mediawiki...CirrusSearch)
jenkins-bot has submitted this change and it was merged. Change subject: Checker: properly check if a page is a redirect .. Checker: properly check if a page is a redirect The Checker class only checked WikiPage::isRedirect() to determine if a page is a redirect. Unfortunately this value can be inconsistent and is not the one used by Update jobs. Checker will now load and parse the page content so that it's consistent with what the update jobs will do. Added a --fastCheck option to the maint script to use the old behavior. Change-Id: Iaf2d5d3c4272d21069da7947fcf4108c8e6d104e --- M includes/Sanity/Checker.php M maintenance/saneitize.php 2 files changed, 42 insertions(+), 6 deletions(-) Approvals: Smalyshev: Looks good to me, but someone else must approve Cindy-the-browser-test-bot: Looks good to me, but someone else must approve EBernhardson: Looks good to me, approved jenkins-bot: Verified diff --git a/includes/Sanity/Checker.php b/includes/Sanity/Checker.php index ab99cb1..ea5cf0e 100644 --- a/includes/Sanity/Checker.php +++ b/includes/Sanity/Checker.php @@ -50,6 +50,12 @@ private $logSane; /** +* @var bool inspect WikiPage::isRedirect() instead of WikiPage::getContent()->isRedirect() +* Faster since it does not need to fetch the content but inconsistent in some cases. +*/ + private $fastRedirectCheck; + + /** * Build the checker. * @param Connection $connection * @param Remediator $remediator the remediator to which to send titles @@ -57,11 +63,12 @@ * @param Searcher $searcher searcher to use for fetches * @param bool $logSane should we log sane ids */ - public function __construct( Connection $connection, Remediator $remediator, Searcher $searcher, $logSane ) { + public function __construct( Connection $connection, Remediator $remediator, Searcher $searcher, $logSane, $fastRedirectCheck ) { $this->connection = $connection; $this->remediator = $remediator; $this->searcher = $searcher; $this->logSane = $logSane; + $this->fastRedirectCheck = $fastRedirectCheck; } /** @@ -111,7 +118,7 @@ */ private function checkExisitingPage( $pageId, $page, $fromIndex ) { $inIndex = count( $fromIndex ) > 0; - if ( $page->isRedirect() ) { + if ( $this->checkIfRedirect( $page ) ) { if ( $inIndex ) { $this->remediator->redirectInIndex( $page ); return true; @@ -124,6 +131,26 @@ } $this->remediator->pageNotInIndex( $page ); return true; + } + + /** +* Check if the page is a redirect +* @param WikiPage $page the page +* @return bool true if $page is a redirect +*/ + private function checkIfRedirect( $page ) { + if ( $this->fastRedirectCheck ) { + return $page->isRedirect(); + } + + $content = $page->getContent(); + if ( $content == null ) { + return false; + } + if( is_object ( $content ) ) { + return $content->isRedirect(); + } + return false; } /** @@ -197,7 +224,7 @@ __METHOD__ ); foreach ( $res as $row ) { - $page = WikiPage::newFromRow( $row, $dbr ); + $page = WikiPage::newFromRow( $row ); $pages[$page->getId()] = $page; } return $pages; @@ -213,7 +240,7 @@ if ( !$status->isOK() ) { throw new \Exception( 'Cannot fetch ids from index' ); } - /** @var \Elastica\ResultSet $indexInfo */ + /** @var \Elastica\ResultSet $dataFromIndex */ $dataFromIndex = $status->getValue(); $indexedPages = array(); diff --git a/maintenance/saneitize.php b/maintenance/saneitize.php index e8a20e7..9d51aa2 100644 --- a/maintenance/saneitize.php +++ b/maintenance/saneitize.php @@ -46,6 +46,11 @@ private $toId; /** +* @var bool true to enable fast but inconsistent redirect checks +*/ + private $fastCheck; + + /** * @var Checker Checks is the index is insane, and calls on a Remediator * instance to do something about it. The remediator may fix the issue, * log about it, or do a combination. @@ -60,6 +65,7 @@ $this->addOption( 'toId', 'Stop sanitizing at a specific page_id. Default to the maximum id in the db + 100.', false, true );
[MediaWiki-commits] [Gerrit] Checker: properly check if a page is a redirect - change (mediawiki...CirrusSearch)
DCausse has uploaded a new change for review. https://gerrit.wikimedia.org/r/294725 Change subject: Checker: properly check if a page is a redirect .. Checker: properly check if a page is a redirect The Checker class only checked WikiPage::isRedirect() to determine if a page is a redirect. Unfortunately this value can be inconsistent and is not the one used by Update jobs. Checker will now load and parse the page content so that it's consistent with what the update jobs will do. Added a --fastCheck option to the maint script to use the old behavior. Change-Id: Iaf2d5d3c4272d21069da7947fcf4108c8e6d104e --- M includes/Sanity/Checker.php M maintenance/saneitize.php 2 files changed, 42 insertions(+), 6 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/25/294725/1 diff --git a/includes/Sanity/Checker.php b/includes/Sanity/Checker.php index ab99cb1..ea5cf0e 100644 --- a/includes/Sanity/Checker.php +++ b/includes/Sanity/Checker.php @@ -50,6 +50,12 @@ private $logSane; /** +* @var bool inspect WikiPage::isRedirect() instead of WikiPage::getContent()->isRedirect() +* Faster since it does not need to fetch the content but inconsistent in some cases. +*/ + private $fastRedirectCheck; + + /** * Build the checker. * @param Connection $connection * @param Remediator $remediator the remediator to which to send titles @@ -57,11 +63,12 @@ * @param Searcher $searcher searcher to use for fetches * @param bool $logSane should we log sane ids */ - public function __construct( Connection $connection, Remediator $remediator, Searcher $searcher, $logSane ) { + public function __construct( Connection $connection, Remediator $remediator, Searcher $searcher, $logSane, $fastRedirectCheck ) { $this->connection = $connection; $this->remediator = $remediator; $this->searcher = $searcher; $this->logSane = $logSane; + $this->fastRedirectCheck = $fastRedirectCheck; } /** @@ -111,7 +118,7 @@ */ private function checkExisitingPage( $pageId, $page, $fromIndex ) { $inIndex = count( $fromIndex ) > 0; - if ( $page->isRedirect() ) { + if ( $this->checkIfRedirect( $page ) ) { if ( $inIndex ) { $this->remediator->redirectInIndex( $page ); return true; @@ -124,6 +131,26 @@ } $this->remediator->pageNotInIndex( $page ); return true; + } + + /** +* Check if the page is a redirect +* @param WikiPage $page the page +* @return bool true if $page is a redirect +*/ + private function checkIfRedirect( $page ) { + if ( $this->fastRedirectCheck ) { + return $page->isRedirect(); + } + + $content = $page->getContent(); + if ( $content == null ) { + return false; + } + if( is_object ( $content ) ) { + return $content->isRedirect(); + } + return false; } /** @@ -197,7 +224,7 @@ __METHOD__ ); foreach ( $res as $row ) { - $page = WikiPage::newFromRow( $row, $dbr ); + $page = WikiPage::newFromRow( $row ); $pages[$page->getId()] = $page; } return $pages; @@ -213,7 +240,7 @@ if ( !$status->isOK() ) { throw new \Exception( 'Cannot fetch ids from index' ); } - /** @var \Elastica\ResultSet $indexInfo */ + /** @var \Elastica\ResultSet $dataFromIndex */ $dataFromIndex = $status->getValue(); $indexedPages = array(); diff --git a/maintenance/saneitize.php b/maintenance/saneitize.php index e8a20e7..9d51aa2 100644 --- a/maintenance/saneitize.php +++ b/maintenance/saneitize.php @@ -46,6 +46,11 @@ private $toId; /** +* @var bool true to enable fast but inconsistent redirect checks +*/ + private $fastCheck; + + /** * @var Checker Checks is the index is insane, and calls on a Remediator * instance to do something about it. The remediator may fix the issue, * log about it, or do a combination. @@ -60,6 +65,7 @@ $this->addOption( 'toId', 'Stop sanitizing at a specific page_id. Default to the maximum id in the db + 100.', false, true ); $this->addOption( 'noop', 'Rather then queue remediation actions do nothing.' );