[MediaWiki-commits] [Gerrit] Checker: properly check if a page is a redirect - change (mediawiki...CirrusSearch)

2016-06-16 Thread jenkins-bot (Code Review)
jenkins-bot has submitted this change and it was merged.

Change subject: Checker: properly check if a page is a redirect
..


Checker: properly check if a page is a redirect

The Checker class only checked WikiPage::isRedirect() to determine if a page
is a redirect. Unfortunately this value can be inconsistent and is not the
one used by Update jobs.
Checker will now load and parse the page content so that it's consistent with
what the update jobs will do.
Added a --fastCheck option to the maint script to use the old behavior.

Change-Id: Iaf2d5d3c4272d21069da7947fcf4108c8e6d104e
---
M includes/Sanity/Checker.php
M maintenance/saneitize.php
2 files changed, 42 insertions(+), 6 deletions(-)

Approvals:
  Smalyshev: Looks good to me, but someone else must approve
  Cindy-the-browser-test-bot: Looks good to me, but someone else must approve
  EBernhardson: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/includes/Sanity/Checker.php b/includes/Sanity/Checker.php
index ab99cb1..ea5cf0e 100644
--- a/includes/Sanity/Checker.php
+++ b/includes/Sanity/Checker.php
@@ -50,6 +50,12 @@
private $logSane;
 
/**
+* @var bool inspect WikiPage::isRedirect() instead of 
WikiPage::getContent()->isRedirect()
+* Faster since it does not need to fetch the content but inconsistent 
in some cases.
+*/
+   private $fastRedirectCheck;
+
+   /**
 * Build the checker.
 * @param Connection $connection
 * @param Remediator $remediator the remediator to which to send titles
@@ -57,11 +63,12 @@
 * @param Searcher $searcher searcher to use for fetches
 * @param bool $logSane should we log sane ids
 */
-   public function __construct( Connection $connection, Remediator 
$remediator, Searcher $searcher, $logSane ) {
+   public function __construct( Connection $connection, Remediator 
$remediator, Searcher $searcher, $logSane, $fastRedirectCheck ) {
$this->connection = $connection;
$this->remediator = $remediator;
$this->searcher = $searcher;
$this->logSane = $logSane;
+   $this->fastRedirectCheck = $fastRedirectCheck;
}
 
/**
@@ -111,7 +118,7 @@
 */
private function checkExisitingPage( $pageId, $page, $fromIndex ) {
$inIndex = count( $fromIndex ) > 0;
-   if ( $page->isRedirect() ) {
+   if ( $this->checkIfRedirect( $page ) ) {
if ( $inIndex ) {
$this->remediator->redirectInIndex( $page );
return true;
@@ -124,6 +131,26 @@
}
$this->remediator->pageNotInIndex( $page );
return true;
+   }
+
+   /**
+* Check if the page is a redirect
+* @param WikiPage $page the page
+* @return bool true if $page is a redirect
+*/
+   private function checkIfRedirect( $page ) {
+   if ( $this->fastRedirectCheck ) {
+   return $page->isRedirect();
+   }
+
+   $content = $page->getContent();
+   if ( $content == null ) {
+   return false;
+   }
+   if( is_object ( $content ) ) {
+   return $content->isRedirect();
+   }
+   return false;
}
 
/**
@@ -197,7 +224,7 @@
__METHOD__
);
foreach ( $res as $row ) {
-   $page = WikiPage::newFromRow( $row, $dbr );
+   $page = WikiPage::newFromRow( $row );
$pages[$page->getId()] = $page;
}
return $pages;
@@ -213,7 +240,7 @@
if ( !$status->isOK() ) {
throw new \Exception( 'Cannot fetch ids from index' );
}
-   /** @var \Elastica\ResultSet $indexInfo */
+   /** @var \Elastica\ResultSet $dataFromIndex */
$dataFromIndex = $status->getValue();
 
$indexedPages = array();
diff --git a/maintenance/saneitize.php b/maintenance/saneitize.php
index e8a20e7..9d51aa2 100644
--- a/maintenance/saneitize.php
+++ b/maintenance/saneitize.php
@@ -46,6 +46,11 @@
private $toId;
 
/**
+* @var bool true to enable fast but inconsistent redirect checks
+*/
+   private $fastCheck;
+
+   /**
 * @var Checker Checks is the index is insane, and calls on a Remediator
 *  instance to do something about it. The remediator may fix the issue,
 *  log about it, or do a combination.
@@ -60,6 +65,7 @@
$this->addOption( 'toId', 'Stop sanitizing at a specific 
page_id.  Default to the maximum id in the db + 100.', false, true );

[MediaWiki-commits] [Gerrit] Checker: properly check if a page is a redirect - change (mediawiki...CirrusSearch)

2016-06-16 Thread DCausse (Code Review)
DCausse has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/294725

Change subject: Checker: properly check if a page is a redirect
..

Checker: properly check if a page is a redirect

The Checker class only checked WikiPage::isRedirect() to determine if a page
is a redirect. Unfortunately this value can be inconsistent and is not the
one used by Update jobs.
Checker will now load and parse the page content so that it's consistent with
what the update jobs will do.
Added a --fastCheck option to the maint script to use the old behavior.

Change-Id: Iaf2d5d3c4272d21069da7947fcf4108c8e6d104e
---
M includes/Sanity/Checker.php
M maintenance/saneitize.php
2 files changed, 42 insertions(+), 6 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/25/294725/1

diff --git a/includes/Sanity/Checker.php b/includes/Sanity/Checker.php
index ab99cb1..ea5cf0e 100644
--- a/includes/Sanity/Checker.php
+++ b/includes/Sanity/Checker.php
@@ -50,6 +50,12 @@
private $logSane;
 
/**
+* @var bool inspect WikiPage::isRedirect() instead of 
WikiPage::getContent()->isRedirect()
+* Faster since it does not need to fetch the content but inconsistent 
in some cases.
+*/
+   private $fastRedirectCheck;
+
+   /**
 * Build the checker.
 * @param Connection $connection
 * @param Remediator $remediator the remediator to which to send titles
@@ -57,11 +63,12 @@
 * @param Searcher $searcher searcher to use for fetches
 * @param bool $logSane should we log sane ids
 */
-   public function __construct( Connection $connection, Remediator 
$remediator, Searcher $searcher, $logSane ) {
+   public function __construct( Connection $connection, Remediator 
$remediator, Searcher $searcher, $logSane, $fastRedirectCheck ) {
$this->connection = $connection;
$this->remediator = $remediator;
$this->searcher = $searcher;
$this->logSane = $logSane;
+   $this->fastRedirectCheck = $fastRedirectCheck;
}
 
/**
@@ -111,7 +118,7 @@
 */
private function checkExisitingPage( $pageId, $page, $fromIndex ) {
$inIndex = count( $fromIndex ) > 0;
-   if ( $page->isRedirect() ) {
+   if ( $this->checkIfRedirect( $page ) ) {
if ( $inIndex ) {
$this->remediator->redirectInIndex( $page );
return true;
@@ -124,6 +131,26 @@
}
$this->remediator->pageNotInIndex( $page );
return true;
+   }
+
+   /**
+* Check if the page is a redirect
+* @param WikiPage $page the page
+* @return bool true if $page is a redirect
+*/
+   private function checkIfRedirect( $page ) {
+   if ( $this->fastRedirectCheck ) {
+   return $page->isRedirect();
+   }
+
+   $content = $page->getContent();
+   if ( $content == null ) {
+   return false;
+   }
+   if( is_object ( $content ) ) {
+   return $content->isRedirect();
+   }
+   return false;
}
 
/**
@@ -197,7 +224,7 @@
__METHOD__
);
foreach ( $res as $row ) {
-   $page = WikiPage::newFromRow( $row, $dbr );
+   $page = WikiPage::newFromRow( $row );
$pages[$page->getId()] = $page;
}
return $pages;
@@ -213,7 +240,7 @@
if ( !$status->isOK() ) {
throw new \Exception( 'Cannot fetch ids from index' );
}
-   /** @var \Elastica\ResultSet $indexInfo */
+   /** @var \Elastica\ResultSet $dataFromIndex */
$dataFromIndex = $status->getValue();
 
$indexedPages = array();
diff --git a/maintenance/saneitize.php b/maintenance/saneitize.php
index e8a20e7..9d51aa2 100644
--- a/maintenance/saneitize.php
+++ b/maintenance/saneitize.php
@@ -46,6 +46,11 @@
private $toId;
 
/**
+* @var bool true to enable fast but inconsistent redirect checks
+*/
+   private $fastCheck;
+
+   /**
 * @var Checker Checks is the index is insane, and calls on a Remediator
 *  instance to do something about it. The remediator may fix the issue,
 *  log about it, or do a combination.
@@ -60,6 +65,7 @@
$this->addOption( 'toId', 'Stop sanitizing at a specific 
page_id.  Default to the maximum id in the db + 100.', false, true );
$this->addOption( 'noop', 'Rather then queue remediation 
actions do nothing.' );