Manybubbles has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/75135


Change subject: Reindex pages on template changes.
......................................................................

Reindex pages on template changes.

This uses the bulk html update hook proposed in
https://gerrit.wikimedia.org/r/#/c/75131/ to schedule search updates to
the same list of ids.

Change-Id: Ic2f4374d03cffeb005b88cd25dd2f483454653a8
---
M CirrusSearch.body.php
M CirrusSearch.php
A CirrusSearchUpdateJob.php
M README
4 files changed, 104 insertions(+), 18 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/35/75135/1

diff --git a/CirrusSearch.body.php b/CirrusSearch.body.php
index fa42d21..1656fdd 100644
--- a/CirrusSearch.body.php
+++ b/CirrusSearch.body.php
@@ -282,25 +282,25 @@
 
        public function update( $id, $title, $text ) {
                $revision = Revision::loadFromPageId( wfGetDB( DB_SLAVE ), $id 
);
-               $content = $revision->getContent();
-               if ( $content->isRedirect() ) {
-                       $target = $content->getUltimateRedirectTarget();
-                       wfDebugLog( 'CirrusSearch', "Updating search index for 
$title which is a redirect to " . $target->getText() );
-                       $targetRevision = Revision::loadFromPageId( wfGetDB( 
DB_SLAVE ), $target->getArticleID() );
-                       $newUpdate = new SearchUpdate( $target->getArticleID(), 
$target, $targetRevision->getContent() );
-                       $newUpdate->doUpdate();
-               } else {
-                       // Technically this is supposed to be just a title 
update but that is more complicated then
-                       // just rebuilding the text.  It doesn't look like 
these title updates are used frequently
-                       // so we'll just go with the simple implementation here.
-                       if ( $text === null ) {
-                               $text = $this0->getTextFromContent( 
$revision->getTitle(), $content );
-                       }
-                       CirrusSearchUpdater::updateRevisions( array( array(
-                               'rev' => $revision,
-                               'text' => $text
-                       ) ) );
+               CirrusSearchUpdater::updateRevisions( array( 
$this->buildPageData( $revision, $text ) ) );
+       }
+
+       public static function bulkInvalidateHtmlCacheHook( $changedTitle, 
$titles ) {
+               $titleIds = array();
+               foreach ( $titles as $title ) {
+                       $titleIds[] = $title->getArticleID();
                }
+               JobQueueGroup::singleton()->push( new CirrusSearchUpdateJob( 
$changedTitle, $titleIds ) );
+               return true;
+       }
+
+       public function bulkUpdate( $titles ) {
+               $pageData = array();
+               foreach ( $titles as $title ) {
+                       $revision = Revision::loadFromPageId( wfGetDB( DB_SLAVE 
), $title->getArticleID() );
+                       $pageData[] = $this->buildPageData( $revision );
+               }
+               CirrusSearchUpdater::updateRevisions( $pageData );
        }
 
        public function updateTitle( $id, $title ) {
@@ -326,6 +326,26 @@
                }
                return $text;
        }
+
+       private function buildPageData( $revision, $text = null ) {
+               $content = $revision->getContent();
+               if ( $content->isRedirect() ) {
+                       $target = $content->getUltimateRedirectTarget();
+                       $targetRevision = Revision::loadFromPageId( wfGetDB( 
DB_SLAVE ), $target->getArticleID() );
+                       return array(
+                               'rev' => $targetRevision,
+                               'text' => $this->getTextFromContent( 
$revision->getTitle(), $content )
+                       );
+               } else {
+                       if ( $text === null ) {
+                               $text = $this->getTextFromContent( 
$revision->getTitle(), $content );
+                       }
+                       return array(
+                               'rev' => $revision,
+                               'text' => $text
+                       );
+               }
+       }
 }
 
 /**
diff --git a/CirrusSearch.php b/CirrusSearch.php
index 5c8408a..67ad0a8 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -60,6 +60,7 @@
  * Classes
  */
 $wgAutoloadClasses['CirrusSearch'] = $dir . 'CirrusSearch.body.php';
+$wgAutoloadClasses['CirrusSearchUpdateJob'] = $dir . 
'CirrusSearchUpdateJob.php';
 $wgAutoloadClasses['CirrusSearchUpdater'] = $dir . 'CirrusSearchUpdater.php';
 $wgAutoloadClasses['ConfigBuilder'] = $dir . 'config/ConfigBuilder.php';
 $wgAutoloadClasses['SchemaBuilder'] = $dir . 'config/SchemaBuilder.php';
@@ -121,12 +122,21 @@
  * Also check Setup for other hooks.
  */
 $wgHooks['SearchUpdate'][] = function() { return false; };
+/*
+ * Note that we steal a hook for invalidating html here - this is rude but the
+ * invalidation hook does an amazing job chunking the pages.
+ */
+$wgHooks['BulkInvalidateHTMLCache'][] = 
'CirrusSearch::bulkInvalidateHtmlCacheHook';
 
 /**
  * i18n
  */
 $wgExtensionMessagesFiles['CirrusSearch'] = $dir . 'CirrusSearch.i18n.php';
 
+/**
+ * Jobs
+ */
+$wgJobClasses['cirrusSearchUpdateJob'] = 'CirrusSearchUpdateJob';
 
 /**
  * Setup
diff --git a/CirrusSearchUpdateJob.php b/CirrusSearchUpdateJob.php
new file mode 100644
index 0000000..4433e42
--- /dev/null
+++ b/CirrusSearchUpdateJob.php
@@ -0,0 +1,55 @@
+<?php
+/**
+ * Search update job.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Cache
+ */
+
+/**
+ * Job wrapper around CirrusSearch's bulkUpdate method.  Gets scheduled 
whenever
+ * a page is cleared from the html cache.
+ *
+ * @ingroup JobQueue
+ */
+class CirrusSearchUpdateJob extends Job {
+       /**
+        * Construct this job.
+        * @param $title Title: title that changed which required $titleIds to 
be
+        *          reindexed
+        * @param $titleIds array: title ids to reindex
+        * @param $id Integer: job id
+        */
+       function __construct( $title, $titleIds, $id = 0 ) {
+               $params = array(
+                       'titleIds' => $titleIds
+               );
+               parent::__construct( 'cirrusSearchUpdateJob', $title, $params, 
$id );
+       }
+
+       public function run() {
+               $titleIds = $this->getParams();
+               $titleIds = $titleIds[ 'titleIds' ];
+               $titles = array();
+               foreach ( $titleIds as $titleId ) {
+                       $titles[] = Title::newFromId( $titleId );
+               }
+               $search = new CirrusSearch();
+               $search->bulkUpdate( $titles );
+       }
+}
diff --git a/README b/README
index 9d938a8..ff29671 100644
--- a/README
+++ b/README
@@ -14,6 +14,7 @@
  $wgCirrusSearchServers = array( 'elasticsearch0', 'elasticsearch1', 
'elasticsearch2', 'elasticsearch3' );
 There are other $wgCirrusSearch variables that you might want to change from 
their defaults.
 If you want to change them then set their new values with 
$wgCirrusSearchServers in LocalSettings.php.
+If you use Redis for the JobQueue you should make sure the 
CirrusSearchUpdateJob will go there.
 
 Now run this script to generate your elasticsearch index:
  php maintenance/updateSearchConfig.php

-- 
To view, visit https://gerrit.wikimedia.org/r/75135
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic2f4374d03cffeb005b88cd25dd2f483454653a8
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to