Manybubbles has uploaded a new change for review.
https://gerrit.wikimedia.org/r/81020
Change subject: Update the search index for linked pages.
......................................................................
Update the search index for linked pages.
Also don't blow up if someone redirects to a page that doesn't exist.
Change-Id: Icbf35f5443796085b688996b07703234c9116a0b
---
M CirrusSearch.body.php
M CirrusSearch.php
2 files changed, 60 insertions(+), 5 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch
refs/changes/20/81020/1
diff --git a/CirrusSearch.body.php b/CirrusSearch.body.php
index 4c418e1..110153c 100644
--- a/CirrusSearch.body.php
+++ b/CirrusSearch.body.php
@@ -410,16 +410,20 @@
}
public function update( $id, $title, $text ) {
- $revision = Revision::loadFromPageId( wfGetDB( DB_SLAVE ), $id
);
- $content = $revision->getContent();
if ( in_array( $id, CirrusSearch::$updated ) ) {
// Already indexed $id
return;
}
+ $revision = Revision::loadFromPageId( wfGetDB( DB_SLAVE ), $id
);
+ $content = $revision->getContent();
if ( $content->isRedirect() ) {
$target = $content->getUltimateRedirectTarget();
wfDebugLog( 'CirrusSearch', "Updating search index for
$title which is a redirect to " . $target->getText() );
$targetRevision = Revision::loadFromPageId( wfGetDB(
DB_SLAVE ), $target->getArticleID() );
+ // If you are building a redirect to a non-existant
page then don't error out
+ if ( $targetRevision === null ) {
+ return;
+ }
$newUpdate = new SearchUpdate( $target->getArticleID(),
$target, $targetRevision->getContent() );
$newUpdate->doUpdate();
} else {
@@ -438,10 +442,59 @@
}
/**
- * @param $linkUpdate LinksUpdate
+ * Hooked to update the search index for pages when templates that they
include are changed
+ * and to kick off updating linked articles.
+ * @param $linksUpdate LinksUpdate
*/
- public static function linksUpdateCompletedHook( $linkUpdate ) {
- $title = $linkUpdate->getTitle();
+ public static function linksUpdateCompletedHook( $linksUpdate ) {
+ self::updateFromTitle( $linksUpdate->getTitle() );
+ self::updateLinkedArticles( $linksUpdate );
+ }
+
+ /**
+ * Update the search index for articles linked from this article.
+ * @param $linksUpdate LinksUpdate
+ */
+ private static function updateLinkedArticles( $linksUpdate ) {
+ // This could be made more efficient by having LinksUpdate
return a list of articles who
+ // have been newly linked or newly unlinked. Those are the
only articles that we need
+ // to reindex any way.
+
+ // This could also be made more efficient by only updating the
link counts rather than
+ // reindexing the whole article.
+ global $wgCirrusSearchLinkedArticlesToUpdate;
+
+ // Build a big list of candidate pages who's links we should
update
+ $candidates = array();
+ foreach ( $linksUpdate->getParserOutput()->getLinks() as $ns =>
$ids ) {
+ foreach ( $ids as $id ) {
+ $candidates[] = $id;
+ }
+ }
+
+ // Pick up to $wgCirrusSearchLinkedArticlesToUpdate links to
update
+ $chosenCount = min( count( $candidates ),
$wgCirrusSearchLinkedArticlesToUpdate );
+ if ( $chosenCount < 1 ) {
+ return;
+ }
+ $chosen = array_rand( $candidates, $chosenCount );
+ // array_rand is $chosenCount === 1 then array_rand will return
a key rather than an
+ // array of keys so just wrap the key and move on with the rest
of the request.
+ if ( !is_array( $chosen ) ) {
+ $chosen = array( $chosen );
+ }
+ foreach ( $chosen as $key ) {
+ $title = Title::newFromID( $candidates[ $key ] );
+ // Skip links to non-existant pages.
+ if ( $title === null ) {
+ continue;
+ }
+ wfDebugLog( 'CirrusSearch', "Updating $title because it
was linked." );
+ self::updateFromTitle( $title );
+ }
+ }
+
+ private static function updateFromTitle( $title ) {
$articleId = $title->getArticleID();
$revision = Revision::loadFromPageId( wfGetDB( DB_SLAVE ),
$articleId );
diff --git a/CirrusSearch.php b/CirrusSearch.php
index b863e2a..32e5b8f 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -59,6 +59,8 @@
// Maximum number of redirects per target page to index.
$wgCirrusSearchIndexedRedirects = 1024;
+// Maximum number of linked articles to update every time an article changes.
+$wgCirrusSearchLinkedArticlesToUpdate = 5;
$dir = __DIR__ . '/';
--
To view, visit https://gerrit.wikimedia.org/r/81020
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Icbf35f5443796085b688996b07703234c9116a0b
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits