EBernhardson has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/398124 )

Change subject: Resolve redirect namespaces from source docs in fancy title 
results type
......................................................................

Resolve redirect namespaces from source docs in fancy title results type

Rather than figuring out an appropriate namespace for redirects we
used the namespace of the document redirected to which is regularly
wrong, especially for 'shorthand' redirects on wikis such as
CAT:PROD on enwiki in NS_MAIN which redirects to Category:Proposed
Deletion (any many other similar shortcuts). Dig through the redirects
stored with the document and figure out what the likely namespace
of the document is.

Change-Id: I8de5c9d35ed709ee100cc3ad8093e49a7a5476d3
---
M includes/CompletionSuggester.php
M includes/Search/ResultsType.php
M tests/unit/Search/ResultsTypeTest.php
3 files changed, 171 insertions(+), 17 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/24/398124/1

diff --git a/includes/CompletionSuggester.php b/includes/CompletionSuggester.php
index e553aca..bfcf045 100644
--- a/includes/CompletionSuggester.php
+++ b/includes/CompletionSuggester.php
@@ -283,9 +283,10 @@
                // they'll be forgotten in client response
                $score = $collector->getMinScore() !== null ? 
$collector->getMinScore() - 1 : count( $prefixResults->getResults() );
 
+               $namespaces = $this->searchContext->getNamespaces();
                foreach ( $prefixResults->getResults() as $res ) {
                        $pageId = $this->config->makePageId( $res->getId() );
-                       $title = 
FancyTitleResultsType::chooseBestTitleOrRedirect( 
$rType->transformOneElasticResult( $res ) );
+                       $title = 
FancyTitleResultsType::chooseBestTitleOrRedirect( 
$rType->transformOneElasticResult( $res, $namespaces ) );
                        if ( $title === false ) {
                                continue;
                        }
diff --git a/includes/Search/ResultsType.php b/includes/Search/ResultsType.php
index 372ef36..39faeff 100644
--- a/includes/Search/ResultsType.php
+++ b/includes/Search/ResultsType.php
@@ -128,6 +128,10 @@
                $this->matchedAnalyzer = $matchedAnalyzer;
        }
 
+       public function getSourceFiltering() {
+               return [ 'namespace', 'title', 'namespace_text', 'wiki', 
'redirect' ];
+       }
+
        /**
         * @param array $highlightSource
         * @return array|null
@@ -223,11 +227,12 @@
         * Transform a result from elastic into an array of Titles.
         *
         * @param \Elastica\Result $r
+        * @param int[] $namespaces Prefer
         * @return \Title[] with the following keys :
         *   titleMatch => a title if the title matched
         *   redirectMatches => an array of redirect matches, one per matched 
redirect
         */
-       public function transformOneElasticResult( \Elastica\Result $r ) {
+       public function transformOneElasticResult( \Elastica\Result $r, array 
$namespaces = [] ) {
                $title = TitleHelper::makeTitle( $r );
                $highlights = $r->getHighlights();
                $resultForTitle = [];
@@ -251,21 +256,16 @@
                                        
$highlights["redirect.title.{$this->matchedAnalyzer}_asciifolding"] );
                }
                if ( count( $redirectHighlights ) !== 0 ) {
-                       foreach ( $redirectHighlights as $redirectTitle ) {
-                               // The match was against a redirect so we 
should replace the $title with one that
-                               // represents the redirect.
-                               // The first step is to strip the actual 
highlighting from the title.
-                               $redirectTitle = str_replace( [ 
Searcher::HIGHLIGHT_PRE, Searcher::HIGHLIGHT_POST ],
-                                       '', $redirectTitle );
-
-                               // Instead of getting the redirect's real 
namespace we're going to just use the namespace
-                               // of the title.  This is not great but OK 
given that we can't find cross namespace
-                               // redirects properly any way.
-                               // TODO: ask the highlighter to return the 
namespace for this kind of matches
-                               // this would perhaps help to partially fix 
T115756
-                               $redirectTitle =
-                                       TitleHelper::makeRedirectTitle( $r, 
$redirectTitle, $r->namespace );
-                               $resultForTitle['redirectMatches'][] = 
$redirectTitle;
+                       $source = $r->getSource();
+                       $docRedirects = [];
+                       if ( isset( $source['redirect'] ) ) {
+                               foreach ( $source['redirect'] as $docRedir ) {
+                                       $docRedirects[$docRedir['title']][] = 
$docRedir;
+                               }
+                       }
+                       foreach ( $redirectHighlights as $redirectTitleString ) 
{
+                               $resultForTitle['redirectMatches'][] = 
$this->resolveRedirectHighlight(
+                                       $r, $redirectTitleString, 
$docRedirects, $namespaces );
                        }
                }
                if ( count( $resultForTitle ) === 0 ) {
@@ -278,6 +278,43 @@
 
                return $resultForTitle;
        }
+
+       /**
+        * @param \Elastica\Result $r Elasticsearch result
+        * @param string $redirectTitleString Highlighted string returned from 
elasticsearch
+        * @param array $docRedirects Map from title string to list of 
redirects from elasticsearch source document
+        * @param int[] $namespaces Prefered namespaces to source redirects from
+        */
+       private function resolveRedirectHighlight( \Elastica\Result $r, 
$redirectTitleString, array $docRedirects, $namespaces ) {
+               // The match was against a redirect so we should replace the 
$title with one that
+               // represents the redirect.
+               // The first step is to strip the actual highlighting from the 
title.
+               $redirectTitleString = str_replace( [ Searcher::HIGHLIGHT_PRE, 
Searcher::HIGHLIGHT_POST ],
+                       '', $redirectTitleString );
+
+               if ( !isset( $docRedirects[$redirectTitleString] ) ) {
+                       // Instead of getting the redirect's real namespace 
we're going to just use the namespace
+                       // of the title.  This is not great.
+                       // TODO: Should we just bail at this point?
+                       return TitleHelper::makeRedirectTitle( $r, 
$redirectTitleString, $r->namespace );
+               }
+
+               $redirs = $docRedirects[$redirectTitleString];
+               if ( count( $redirs ) === 1 ) {
+                       // may or may not be the right namespace, but we don't 
seem to have any other options.
+                       return TitleHelper::makeRedirectTitle( $r, 
$redirectTitleString, $redirs[0]['namespace'] );
+               }
+
+               if ( $namespaces ) {
+                       foreach ( $redirs as $redir ) {
+                               if ( array_search( $redir['namespace'], 
$namespaces ) ) {
+                                       return TitleHelper::makeRedirectTitle( 
$r, $redirectTitleString, $redir['namespace'] );
+                               }
+                       }
+               }
+               // Multiple redirects with same text from different namespaces, 
but none of them match the requested namespaces. What now?
+               return TitleHelper::makeRedirectTitle( $r, 
$redirectTitleString, $redirs[0]['namespace'] );
+       }
 }
 
 /**
diff --git a/tests/unit/Search/ResultsTypeTest.php 
b/tests/unit/Search/ResultsTypeTest.php
index 03695a7..24eba6a 100644
--- a/tests/unit/Search/ResultsTypeTest.php
+++ b/tests/unit/Search/ResultsTypeTest.php
@@ -3,6 +3,7 @@
 namespace CirrusSearch\Search;
 
 use CirrusSearch\CirrusTestCase;
+use CirrusSearch\Searcher;
 
 /**
  * Test escaping search strings.
@@ -225,4 +226,119 @@
                        ],
                ];
        }
+
+       public function fancyRedirectHandlingProvider() {
+               return [
+                       'typical title only match' => [
+                               'Trebuchet',
+                               [
+                                       '_source' => [
+                                               'namespace_text' => '',
+                                               'namespace' => 0,
+                                               'title' => 'Trebuchet',
+                                       ],
+                               ],
+                       ],
+                       'partial title match' => [
+                               'Trebuchet',
+                               [
+                                       'highlight' => [
+                                               'title.prefix' => [
+                                                       Searcher::HIGHLIGHT_PRE 
. 'Trebu' . Searcher::HIGHLIGHT_POST . 'chet',
+                                               ],
+                                       ],
+                                       '_source' => [
+                                               'namespace_text' => '',
+                                               'namespace' => 0,
+                                               'title' => 'Trebuchet',
+                                       ],
+                               ],
+                       ],
+                       'full redirect match same namespace' => [
+                               'Pierriere',
+                               [
+                                       'highlight' => [
+                                               'redirect.title.prefix' => [
+                                                       Searcher::HIGHLIGHT_PRE 
. 'Pierriere' . Searcher::HIGHLIGHT_POST,
+                                               ],
+                                       ],
+                                       '_source' => [
+                                               'namespace_text' => '',
+                                               'namespace' => 0,
+                                               'title' => 'Trebuchet',
+                                               'redirect' => [
+                                                       [ 'namespace' => 0, 
'title' => 'Pierriere' ]
+                                               ],
+                                       ],
+                               ],
+                       ],
+                       'full redirect match other namespace' => [
+                               'Category:Pierriere',
+                               [
+                                       'highlight' => [
+                                               'redirect.title.prefix' => [
+                                                       Searcher::HIGHLIGHT_PRE 
. 'Pierriere' . Searcher::HIGHLIGHT_POST,
+                                               ],
+                                       ],
+                                       '_source' => [
+                                               'namespace_text' => '',
+                                               'namespace' => 0,
+                                               'title' => 'Trebuchet',
+                                               'redirect' => [
+                                                       [ 'namespace' => 14, 
'title' => 'Pierriere' ]
+                                               ],
+                                       ],
+                               ],
+                       ],
+                       'partial redirect match other namespace' => [
+                               'Category:Pierriere',
+                               [
+                                       'highlight' => [
+                                               'redirect.title.prefix' => [
+                                                       Searcher::HIGHLIGHT_PRE 
. 'Pi' . Searcher::HIGHLIGHT_POST . 'erriere',
+                                               ],
+                                       ],
+                                       '_source' => [
+                                               'namespace_text' => '',
+                                               'namespace' => 0,
+                                               'title' => 'Trebuchet',
+                                               'redirect' => [
+                                                       [ 'namespace' => 14, 
'title' => 'Pierriere' ]
+                                               ],
+                                       ],
+                               ],
+                       ],
+                       'multiple redirect namespace matches' => [
+                               'User:Pierriere',
+                               [
+                                       'highlight' => [
+                                               'redirect.title.prefix' => [
+                                                       Searcher::HIGHLIGHT_PRE 
. 'Pierriere' . Searcher::HIGHLIGHT_POST,
+                                               ],
+                                       ],
+                                       '_source' => [
+                                               'namespace_text' => '',
+                                               'namespace' => 0,
+                                               'title' => 'Trebuchet',
+                                               'redirect' => [
+                                                       [ 'namespace' => 14, 
'title' => 'Pierriere' ],
+                                                       [ 'namespace' => 2, 
'title' => 'Pierriere' ],
+                                               ],
+                                       ],
+                               ],
+                               [ 0, 2 ]
+                       ],
+               ];
+       }
+
+       /**
+        * @dataProvider fancyRedirectHandlingProvider
+        */
+       public function testFancyRedirectHandling( $expected, $hit, array 
$namespaces = [] ) {
+               $type = new FancyTitleResultsType( 'prefix' );
+               $result = new \Elastica\Result( $hit );
+               $matches = $type->transformOneElasticResult( $result, 
$namespaces );
+               $title = FancyTitleResultsType::chooseBesttitleOrRedirect( 
$matches );
+               $this->assertEquals( $expected, $title->getPrefixedText() );
+       }
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/398124
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I8de5c9d35ed709ee100cc3ad8093e49a7a5476d3
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to