jenkins-bot has submitted this change and it was merged. Change subject: Infrastructure for augmenting search results ......................................................................
Infrastructure for augmenting search results Bug: T117493 Change-Id: Ia5413a7846cc961026a2dc3542b619493bc76a23 --- M autoload.php M docs/hooks.txt A includes/search/AugmentPageProps.php A includes/search/PerRowAugmentor.php A includes/search/ResultAugmentor.php A includes/search/ResultSetAugmentor.php M includes/search/SearchEngine.php M includes/search/SearchNearMatchResultSet.php M includes/search/SearchResult.php M includes/search/SearchResultSet.php M includes/search/SqlSearchResultSet.php M includes/specials/SpecialSearch.php M tests/phpunit/includes/search/SearchEngineTest.php 13 files changed, 303 insertions(+), 10 deletions(-) Approvals: EBernhardson: Looks good to me, approved jenkins-bot: Verified diff --git a/autoload.php b/autoload.php index 96c8190..9fd83eb 100644 --- a/autoload.php +++ b/autoload.php @@ -153,6 +153,7 @@ 'AtomFeed' => __DIR__ . '/includes/Feed.php', 'AtomicSectionUpdate' => __DIR__ . '/includes/deferred/AtomicSectionUpdate.php', 'AttachLatest' => __DIR__ . '/maintenance/attachLatest.php', + 'AugmentPageProps' => __DIR__ . '/includes/search/AugmentPageProps.php', 'AuthManagerSpecialPage' => __DIR__ . '/includes/specialpage/AuthManagerSpecialPage.php', 'AuthPlugin' => __DIR__ . '/includes/AuthPlugin.php', 'AuthPluginUser' => __DIR__ . '/includes/AuthPlugin.php', @@ -1043,6 +1044,7 @@ 'PatrolLog' => __DIR__ . '/includes/logging/PatrolLog.php', 'PatrolLogFormatter' => __DIR__ . '/includes/logging/PatrolLogFormatter.php', 'Pbkdf2Password' => __DIR__ . '/includes/password/Pbkdf2Password.php', + 'PerRowAugmentor' => __DIR__ . '/includes/search/PerRowAugmentor.php', 'PermissionsError' => __DIR__ . '/includes/exception/PermissionsError.php', 'PhpHttpRequest' => __DIR__ . '/includes/HttpFunctions.php', 'PhpXmlBugTester' => __DIR__ . '/includes/installer/PhpBugTests.php', @@ -1182,6 +1184,8 @@ 'ResourceLoaderUserTokensModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderUserTokensModule.php', 'ResourceLoaderWikiModule' => __DIR__ . '/includes/resourceloader/ResourceLoaderWikiModule.php', 'RestbaseVirtualRESTService' => __DIR__ . '/includes/libs/virtualrest/RestbaseVirtualRESTService.php', + 'ResultAugmentor' => __DIR__ . '/includes/search/ResultAugmentor.php', + 'ResultSetAugmentor' => __DIR__ . '/includes/search/ResultSetAugmentor.php', 'ResultWrapper' => __DIR__ . '/includes/libs/rdbms/database/resultwrapper/ResultWrapper.php', 'RevDelArchiveItem' => __DIR__ . '/includes/revisiondelete/RevDelArchiveItem.php', 'RevDelArchiveList' => __DIR__ . '/includes/revisiondelete/RevDelArchiveList.php', diff --git a/docs/hooks.txt b/docs/hooks.txt index a7fb873..ae0770b 100644 --- a/docs/hooks.txt +++ b/docs/hooks.txt @@ -2699,6 +2699,13 @@ $output: ParserOutput that is produced from the page $engine: SearchEngine for which the indexing is intended +'SearchResultsAugment': Allows extension to add its code to the list of search +result augmentors. +&$setAugmentors: List of whole-set augmentor objects, must implement ResultSetAugmentor +&$rowAugmentors: List of per-row augmentor objects, must implement ResultAugmentor. +Note that lists should be in the format name => object and the names in both lists should +be distinct. + 'SecondaryDataUpdates': Allows modification of the list of DataUpdates to perform when page content is modified. Currently called by AbstractContent::getSecondaryDataUpdates. diff --git a/includes/search/AugmentPageProps.php b/includes/search/AugmentPageProps.php new file mode 100644 index 0000000..29bd463 --- /dev/null +++ b/includes/search/AugmentPageProps.php @@ -0,0 +1,20 @@ +<?php + +/** + * Augment search result set with values of certain page props. + */ +class AugmentPageProps implements ResultSetAugmentor { + /** + * @var array List of properties. + */ + private $propnames; + + public function __construct( $propnames ) { + $this->propnames = $propnames; + } + + public function augmentAll( SearchResultSet $resultSet ) { + $titles = $resultSet->extractTitles(); + return PageProps::getInstance()->getProperties( $titles, $this->propnames ); + } +} diff --git a/includes/search/PerRowAugmentor.php b/includes/search/PerRowAugmentor.php new file mode 100644 index 0000000..8eb8b17 --- /dev/null +++ b/includes/search/PerRowAugmentor.php @@ -0,0 +1,38 @@ +<?php + +/** + * Perform augmentation of each row and return composite result, + * indexed by ID. + */ +class PerRowAugmentor implements ResultSetAugmentor { + + /** + * @var ResultAugmentor + */ + private $rowAugmentor; + + /** + * PerRowAugmentor constructor. + * @param ResultAugmentor $augmentor Per-result augmentor to use. + */ + public function __construct( ResultAugmentor $augmentor ) { + $this->rowAugmentor = $augmentor; + } + + /** + * Produce data to augment search result set. + * @param SearchResultSet $resultSet + * @return array Data for all results + */ + public function augmentAll( SearchResultSet $resultSet ) { + $data = []; + foreach ( $resultSet->extractResults() as $result ) { + $id = $result->getTitle()->getArticleID(); + if ( !$id ) { + continue; + } + $data[$id] = $this->rowAugmentor->augment( $result ); + } + return $data; + } +} diff --git a/includes/search/ResultAugmentor.php b/includes/search/ResultAugmentor.php new file mode 100644 index 0000000..350b780 --- /dev/null +++ b/includes/search/ResultAugmentor.php @@ -0,0 +1,14 @@ +<?php + +/** + * Augment search results. + * + */ +interface ResultAugmentor { + /** + * Produce data to augment search result set. + * @param SearchResult $result + * @return mixed Data for this result + */ + public function augment( SearchResult $result ); +} diff --git a/includes/search/ResultSetAugmentor.php b/includes/search/ResultSetAugmentor.php new file mode 100644 index 0000000..94710a8 --- /dev/null +++ b/includes/search/ResultSetAugmentor.php @@ -0,0 +1,14 @@ +<?php + +/** + * Augment search results. + * + */ +interface ResultSetAugmentor { + /** + * Produce data to augment search result set. + * @param SearchResultSet $resultSet + * @return array Data for all results + */ + public function augmentAll( SearchResultSet $resultSet ); +} diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php index c2ccca0a..1eba141 100644 --- a/includes/search/SearchEngine.php +++ b/includes/search/SearchEngine.php @@ -695,6 +695,37 @@ Hooks::run( 'SearchIndexFields', [ &$fields, $this ] ); return $fields; } + + /** + * Augment search results with extra data. + * + * @param SearchResultSet $resultSet + */ + public function augmentSearchResults( SearchResultSet $resultSet ) { + $setAugmentors = []; + $rowAugmentors = []; + Hooks::run( "SearchResultsAugment", [ &$setAugmentors, &$rowAugmentors ] ); + + if ( !$setAugmentors && !$rowAugmentors ) { + // We're done here + return; + } + + // Convert row augmentors to set augmentor + foreach ( $rowAugmentors as $name => $row ) { + if ( isset( $setAugmentors[$name] ) ) { + throw new InvalidArgumentException( "Both row and set augmentors are defined for $name" ); + } + $setAugmentors[$name] = new PerRowAugmentor( $row ); + } + + foreach ( $setAugmentors as $name => $augmentor ) { + $data = $augmentor->augmentAll( $resultSet ); + if ( $data ) { + $resultSet->setAugmentedData( $name, $data ); + } + } + } } /** diff --git a/includes/search/SearchNearMatchResultSet.php b/includes/search/SearchNearMatchResultSet.php index 6d66707..3141797 100644 --- a/includes/search/SearchNearMatchResultSet.php +++ b/includes/search/SearchNearMatchResultSet.php @@ -21,7 +21,7 @@ return false; } $this->fetched = true; - return SearchResult::newFromTitle( $this->result ); + return SearchResult::newFromTitle( $this->result, $this ); } public function rewind() { diff --git a/includes/search/SearchResult.php b/includes/search/SearchResult.php index 21effbb..50db84b 100644 --- a/includes/search/SearchResult.php +++ b/includes/search/SearchResult.php @@ -57,14 +57,24 @@ protected $searchEngine; /** + * A set of extension data. + * @var array[] + */ + protected $extensionData; + + /** * Return a new SearchResult and initializes it with a title. * - * @param Title $title + * @param Title $title + * @param SearchResultSet $parentSet * @return SearchResult */ - public static function newFromTitle( $title ) { + public static function newFromTitle( $title, SearchResultSet $parentSet = null ) { $result = new static(); $result->initFromTitle( $title ); + if ( $parentSet ) { + $parentSet->augmentResult( $result ); + } return $result; } @@ -250,4 +260,24 @@ function isFileMatch() { return false; } + + /** + * Get the extension data as: + * augmentor name => data + * @return array[] + */ + public function getExtensionData() { + return $this->extensionData; + } + + /** + * Set extension data for this result. + * The data is: + * augmentor name => data + * @param array[] $extensionData + */ + public function setExtensionData( array $extensionData ) { + $this->extensionData = $extensionData; + } + } diff --git a/includes/search/SearchResultSet.php b/includes/search/SearchResultSet.php index 69795e7..978db27 100644 --- a/includes/search/SearchResultSet.php +++ b/includes/search/SearchResultSet.php @@ -42,6 +42,29 @@ protected $containedSyntax = false; + /** + * Cache of titles. + * Lists titles of the result set, in the same order as results. + * @var Title[] + */ + private $titles; + + /** + * Cache of results - serialization of the result iterator + * as an array. + * @var SearchResult[] + */ + private $results; + + /** + * Set of result's extra data, indexed per result id + * and then per data item name. + * The structure is: + * PAGE_ID => [ augmentor name => data, ... ] + * @var array[] + */ + protected $extraData = []; + public function __construct( $containedSyntax = false ) { $this->containedSyntax = $containedSyntax; } @@ -147,15 +170,15 @@ /** * Fetches next search result, or false. * STUB - * - * @return SearchResult + * FIXME: refactor as iterator, so we could use nicer interfaces. + * @return SearchResult|false */ function next() { return false; } /** - * Rewind result set back to begining + * Rewind result set back to beginning */ function rewind() { } @@ -176,4 +199,69 @@ public function searchContainedSyntax() { return $this->containedSyntax; } + + /** + * Extract all the results in the result set as array. + * @return SearchResult[] + */ + public function extractResults() { + if ( is_null( $this->results ) ) { + $this->results = []; + if ( $this->numRows() == 0 ) { + // Don't bother if we've got empty result + return $this->results; + } + $this->rewind(); + while ( ( $result = $this->next() ) != false ) { + $this->results[] = $result; + } + $this->rewind(); + } + return $this->results; + } + + /** + * Extract all the titles in the result set. + * @return Title[] + */ + public function extractTitles() { + if ( is_null( $this->titles ) ) { + if ( $this->numRows() == 0 ) { + // Don't bother if we've got empty result + $this->titles = []; + } else { + $this->titles = array_map( + function ( SearchResult $result ) { + return $result->getTitle(); + }, + $this->extractResults() ); + } + } + return $this->titles; + } + + /** + * Sets augmented data for result set. + * @param string $name Extra data item name + * @param array[] $data Extra data as PAGEID => data + */ + public function setAugmentedData( $name, $data ) { + foreach ( $data as $id => $resultData ) { + $this->extraData[$id][$name] = $resultData; + } + } + + /** + * Returns extra data for specific result and store it in SearchResult object. + * @param SearchResult $result + * @return array|null List of data as name => value or null if none present. + */ + public function augmentResult( SearchResult $result ) { + $id = $result->getTitle()->getArticleID(); + if ( !$id || !isset( $this->extraData[$id] ) ) { + return null; + } + $result->setExtensionData( $this->extraData[$id] ); + return $this->extraData[$id]; + } } diff --git a/includes/search/SqlSearchResultSet.php b/includes/search/SqlSearchResultSet.php index 6b60899..c3985d1 100644 --- a/includes/search/SqlSearchResultSet.php +++ b/includes/search/SqlSearchResultSet.php @@ -37,7 +37,7 @@ } return SearchResult::newFromTitle( - Title::makeTitle( $row->page_namespace, $row->page_title ) + Title::makeTitle( $row->page_namespace, $row->page_title ), $this ); } diff --git a/includes/specials/SpecialSearch.php b/includes/specials/SpecialSearch.php index 26b86f9..6daf19f 100644 --- a/includes/specials/SpecialSearch.php +++ b/includes/specials/SpecialSearch.php @@ -403,6 +403,7 @@ // show results if ( $numTextMatches > 0 ) { + $search->augmentSearchResults( $textMatches ); $out->addHTML( $this->showMatches( $textMatches ) ); } @@ -716,7 +717,7 @@ * * @return string */ - protected function showMatches( &$matches, $interwiki = null ) { + protected function showMatches( $matches, $interwiki = null ) { global $wgContLang; $terms = $wgContLang->convertForSearchResult( $matches->termMatches() ); @@ -725,7 +726,7 @@ $pos = $this->offset; if ( $result && $interwiki ) { - $out .= $this->interwikiHeader( $interwiki, $result ); + $out .= $this->interwikiHeader( $interwiki, $matches ); } $out .= "<ul class='mw-search-results'>\n"; @@ -750,7 +751,7 @@ * * @return string */ - protected function showHit( $result, $terms, $position ) { + protected function showHit( SearchResult $result, $terms, $position ) { if ( $result->isBrokenTitle() ) { return ''; diff --git a/tests/phpunit/includes/search/SearchEngineTest.php b/tests/phpunit/includes/search/SearchEngineTest.php index 0b34b6f..902fc9e 100644 --- a/tests/phpunit/includes/search/SearchEngineTest.php +++ b/tests/phpunit/includes/search/SearchEngineTest.php @@ -207,4 +207,50 @@ $this->assertArrayHasKey( 'testData', $mapping ); $this->assertEquals( 'test', $mapping['testData'] ); } + + public function hookSearchIndexFields( $mockFieldBuilder, &$fields, SearchEngine $engine ) { + $fields['testField'] = $mockFieldBuilder( "testField", SearchIndexField::INDEX_TYPE_TEXT ); + return true; + } + + public function testAugmentorSearch() { + $this->search->setNamespaces( [ 0, 1, 4 ] ); + $resultSet = $this->search->searchText( 'smithee' ); + // Not using mock since PHPUnit mocks do not work properly with references in params + $this->mergeMwGlobalArrayValue( 'wgHooks', + [ 'SearchResultsAugment' => [ [ $this, 'addAugmentors' ] ] ] ); + $this->search->augmentSearchResults( $resultSet ); + for ( $result = $resultSet->next(); $result; $result = $resultSet->next() ) { + $id = $result->getTitle()->getArticleID(); + $augmentData = "Result:$id:" . $result->getTitle()->getText(); + $augmentData2 = "Result2:$id:" . $result->getTitle()->getText(); + $this->assertEquals( [ 'testSet' => $augmentData, 'testRow' => $augmentData2 ], + $result->getExtensionData() ); + } + } + + public function addAugmentors( &$setAugmentors, &$rowAugmentors ) { + $setAugmentor = $this->getMock( 'ResultSetAugmentor' ); + $setAugmentor->expects( $this->once() ) + ->method( 'augmentAll' ) + ->willReturnCallback( function ( SearchResultSet $resultSet ) { + $data = []; + for ( $result = $resultSet->next(); $result; $result = $resultSet->next() ) { + $id = $result->getTitle()->getArticleID(); + $data[$id] = "Result:$id:" . $result->getTitle()->getText(); + } + $resultSet->rewind(); + return $data; + } ); + $setAugmentors['testSet'] = $setAugmentor; + + $rowAugmentor = $this->getMock( 'ResultAugmentor' ); + $rowAugmentor->expects( $this->exactly( 2 ) ) + ->method( 'augment' ) + ->willReturnCallback( function ( SearchResult $result ) { + $id = $result->getTitle()->getArticleID(); + return "Result2:$id:" . $result->getTitle()->getText(); + } ); + $rowAugmentors['testRow'] = $rowAugmentor; + } } -- To view, visit https://gerrit.wikimedia.org/r/285544 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ia5413a7846cc961026a2dc3542b619493bc76a23 Gerrit-PatchSet: 19 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: Smalyshev <smalys...@wikimedia.org> Gerrit-Reviewer: Aude <aude.w...@gmail.com> Gerrit-Reviewer: DCausse <dcau...@wikimedia.org> Gerrit-Reviewer: Daniel Kinzler <daniel.kinz...@wikimedia.de> Gerrit-Reviewer: EBernhardson <ebernhard...@wikimedia.org> Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits