Manybubbles has uploaded a new change for review. https://gerrit.wikimedia.org/r/75135
Change subject: Reindex pages on template changes. ...................................................................... Reindex pages on template changes. This uses the bulk html update hook proposed in https://gerrit.wikimedia.org/r/#/c/75131/ to schedule search updates to the same list of ids. Change-Id: Ic2f4374d03cffeb005b88cd25dd2f483454653a8 --- M CirrusSearch.body.php M CirrusSearch.php A CirrusSearchUpdateJob.php M README 4 files changed, 104 insertions(+), 18 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/35/75135/1 diff --git a/CirrusSearch.body.php b/CirrusSearch.body.php index fa42d21..1656fdd 100644 --- a/CirrusSearch.body.php +++ b/CirrusSearch.body.php @@ -282,25 +282,25 @@ public function update( $id, $title, $text ) { $revision = Revision::loadFromPageId( wfGetDB( DB_SLAVE ), $id ); - $content = $revision->getContent(); - if ( $content->isRedirect() ) { - $target = $content->getUltimateRedirectTarget(); - wfDebugLog( 'CirrusSearch', "Updating search index for $title which is a redirect to " . $target->getText() ); - $targetRevision = Revision::loadFromPageId( wfGetDB( DB_SLAVE ), $target->getArticleID() ); - $newUpdate = new SearchUpdate( $target->getArticleID(), $target, $targetRevision->getContent() ); - $newUpdate->doUpdate(); - } else { - // Technically this is supposed to be just a title update but that is more complicated then - // just rebuilding the text. It doesn't look like these title updates are used frequently - // so we'll just go with the simple implementation here. - if ( $text === null ) { - $text = $this0->getTextFromContent( $revision->getTitle(), $content ); - } - CirrusSearchUpdater::updateRevisions( array( array( - 'rev' => $revision, - 'text' => $text - ) ) ); + CirrusSearchUpdater::updateRevisions( array( $this->buildPageData( $revision, $text ) ) ); + } + + public static function bulkInvalidateHtmlCacheHook( $changedTitle, $titles ) { + $titleIds = array(); + foreach ( $titles as $title ) { + $titleIds[] = $title->getArticleID(); } + JobQueueGroup::singleton()->push( new CirrusSearchUpdateJob( $changedTitle, $titleIds ) ); + return true; + } + + public function bulkUpdate( $titles ) { + $pageData = array(); + foreach ( $titles as $title ) { + $revision = Revision::loadFromPageId( wfGetDB( DB_SLAVE ), $title->getArticleID() ); + $pageData[] = $this->buildPageData( $revision ); + } + CirrusSearchUpdater::updateRevisions( $pageData ); } public function updateTitle( $id, $title ) { @@ -326,6 +326,26 @@ } return $text; } + + private function buildPageData( $revision, $text = null ) { + $content = $revision->getContent(); + if ( $content->isRedirect() ) { + $target = $content->getUltimateRedirectTarget(); + $targetRevision = Revision::loadFromPageId( wfGetDB( DB_SLAVE ), $target->getArticleID() ); + return array( + 'rev' => $targetRevision, + 'text' => $this->getTextFromContent( $revision->getTitle(), $content ) + ); + } else { + if ( $text === null ) { + $text = $this->getTextFromContent( $revision->getTitle(), $content ); + } + return array( + 'rev' => $revision, + 'text' => $text + ); + } + } } /** diff --git a/CirrusSearch.php b/CirrusSearch.php index 5c8408a..67ad0a8 100644 --- a/CirrusSearch.php +++ b/CirrusSearch.php @@ -60,6 +60,7 @@ * Classes */ $wgAutoloadClasses['CirrusSearch'] = $dir . 'CirrusSearch.body.php'; +$wgAutoloadClasses['CirrusSearchUpdateJob'] = $dir . 'CirrusSearchUpdateJob.php'; $wgAutoloadClasses['CirrusSearchUpdater'] = $dir . 'CirrusSearchUpdater.php'; $wgAutoloadClasses['ConfigBuilder'] = $dir . 'config/ConfigBuilder.php'; $wgAutoloadClasses['SchemaBuilder'] = $dir . 'config/SchemaBuilder.php'; @@ -121,12 +122,21 @@ * Also check Setup for other hooks. */ $wgHooks['SearchUpdate'][] = function() { return false; }; +/* + * Note that we steal a hook for invalidating html here - this is rude but the + * invalidation hook does an amazing job chunking the pages. + */ +$wgHooks['BulkInvalidateHTMLCache'][] = 'CirrusSearch::bulkInvalidateHtmlCacheHook'; /** * i18n */ $wgExtensionMessagesFiles['CirrusSearch'] = $dir . 'CirrusSearch.i18n.php'; +/** + * Jobs + */ +$wgJobClasses['cirrusSearchUpdateJob'] = 'CirrusSearchUpdateJob'; /** * Setup diff --git a/CirrusSearchUpdateJob.php b/CirrusSearchUpdateJob.php new file mode 100644 index 0000000..4433e42 --- /dev/null +++ b/CirrusSearchUpdateJob.php @@ -0,0 +1,55 @@ +<?php +/** + * Search update job. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Cache + */ + +/** + * Job wrapper around CirrusSearch's bulkUpdate method. Gets scheduled whenever + * a page is cleared from the html cache. + * + * @ingroup JobQueue + */ +class CirrusSearchUpdateJob extends Job { + /** + * Construct this job. + * @param $title Title: title that changed which required $titleIds to be + * reindexed + * @param $titleIds array: title ids to reindex + * @param $id Integer: job id + */ + function __construct( $title, $titleIds, $id = 0 ) { + $params = array( + 'titleIds' => $titleIds + ); + parent::__construct( 'cirrusSearchUpdateJob', $title, $params, $id ); + } + + public function run() { + $titleIds = $this->getParams(); + $titleIds = $titleIds[ 'titleIds' ]; + $titles = array(); + foreach ( $titleIds as $titleId ) { + $titles[] = Title::newFromId( $titleId ); + } + $search = new CirrusSearch(); + $search->bulkUpdate( $titles ); + } +} diff --git a/README b/README index 9d938a8..ff29671 100644 --- a/README +++ b/README @@ -14,6 +14,7 @@ $wgCirrusSearchServers = array( 'elasticsearch0', 'elasticsearch1', 'elasticsearch2', 'elasticsearch3' ); There are other $wgCirrusSearch variables that you might want to change from their defaults. If you want to change them then set their new values with $wgCirrusSearchServers in LocalSettings.php. +If you use Redis for the JobQueue you should make sure the CirrusSearchUpdateJob will go there. Now run this script to generate your elasticsearch index: php maintenance/updateSearchConfig.php -- To view, visit https://gerrit.wikimedia.org/r/75135 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ic2f4374d03cffeb005b88cd25dd2f483454653a8 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Manybubbles <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
