Mschwarzer has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/329626 )

Change subject: Add Citolytics query prefix
......................................................................

Add Citolytics query prefix

Additional query prefix (citolytics:) allows retrieval of link-based article 
recommendations via Citolytics. Recommendations are stored in a separate 
Elasticsearch index (citolytics_content). New class 
CirrusSearch\Search\CitolyticsResultsType handles transformation to article 
result sets.

The Citolytics project should improve the mobile recommendations.

Issue: T142477
Change-Id: I7525eef60c60ce747d194321c552a3df22d96d8f
---
M autoload.php
A includes/Query/CitolyticsFeature.php
M includes/Query/FullTextQueryStringQueryBuilder.php
M includes/Search/ResultsType.php
M includes/Search/SearchContext.php
M includes/Searcher.php
A tests/unit/Query/CitolyticsFeatureTest.php
7 files changed, 302 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/26/329626/1

diff --git a/autoload.php b/autoload.php
index 33c806c..6d838ad 100644
--- a/autoload.php
+++ b/autoload.php
@@ -186,4 +186,7 @@
        'CirrusSearch\\UserTesting' => __DIR__ . '/includes/UserTesting.php',
        'CirrusSearch\\Util' => __DIR__ . '/includes/Util.php',
        'CirrusSearch\\Version' => __DIR__ . '/includes/Version.php',
+       'CirrusSearch\\Search\\CitolyticsResultsType' => __DIR__ . 
'/includes/Search/ResultsType.php',
+       'CirrusSearch\\Query\\CitolyticsFeature' => __DIR__ . 
'/includes/Query/CitolyticsFeature.php',
+
 ];
diff --git a/includes/Query/CitolyticsFeature.php 
b/includes/Query/CitolyticsFeature.php
new file mode 100644
index 0000000..f230daa
--- /dev/null
+++ b/includes/Query/CitolyticsFeature.php
@@ -0,0 +1,37 @@
+<?php
+
+namespace CirrusSearch\Query;
+
+use CirrusSearch\Search\CitolyticsResultsType;
+use CirrusSearch\Search\SearchContext;
+
+class CitolyticsFeature implements KeywordFeature {
+       /** @const string query prefix that triggers Citolytics */
+       const CITOLYTICS_PREFIX = 'citolytics:';
+
+       const CITOLYTICS_FIELD = 'title';
+       const CITOLYTICS_INDEX_BASE = 'citolytics';
+
+       /**
+        * Greedily match the entire $term as a citolytics query.
+        *
+        * @param SearchContext $context
+        * @param string $term
+        * @return string
+        */
+       public function apply( SearchContext $context, $term ) {
+               if ( substr( $term, 0, strlen( self::CITOLYTICS_PREFIX ) ) === 
self::CITOLYTICS_PREFIX ) {
+
+                       $term = substr( $term, strlen( self::CITOLYTICS_PREFIX 
) );
+                       $context->setExtraIndexBaseName( 
self::CITOLYTICS_INDEX_BASE );
+                       $context->setExtraResultsType( new 
CitolyticsResultsType() );
+
+                       $query = new \Elastica\Query\Match( 
self::CITOLYTICS_FIELD, $term );
+                       $context->setMainQuery( $query );
+
+                       return '';
+               }
+
+               return $term;
+       }
+}
\ No newline at end of file
diff --git a/includes/Query/FullTextQueryStringQueryBuilder.php 
b/includes/Query/FullTextQueryStringQueryBuilder.php
index 2374fca..789d667 100644
--- a/includes/Query/FullTextQueryStringQueryBuilder.php
+++ b/includes/Query/FullTextQueryStringQueryBuilder.php
@@ -53,6 +53,11 @@
                        $term = $feature->apply( $searchContext, $term );
                }
 
+               // Skip if query was already set by a feature
+               if ( !$searchContext->getQuery() instanceof 
\Elastica\Query\MatchAll ) {
+                       return;
+               }
+
                if ( !$searchContext->areResultsPossible() ) {
                        return;
                }
diff --git a/includes/Search/ResultsType.php b/includes/Search/ResultsType.php
index 38780a3..33aff08 100644
--- a/includes/Search/ResultsType.php
+++ b/includes/Search/ResultsType.php
@@ -640,3 +640,70 @@
                return new EmptyResultSet();
        }
 }
+
+/**
+ * Result type for a Citolytics search.
+ */
+class CitolyticsResultsType implements ResultsType {
+       /**
+        * @return false|string|array corresponding to Elasticsearch source 
filtering syntax
+        */
+       public function getSourceFiltering() {
+               return [ 'id', 'title', 'namespace', 'redirect.*', 'timestamp', 
'text_bytes', 'related_content' ];
+       }
+
+       /**
+        * @return string
+        */
+       public function getFields() {
+               return array(); // all data is stored in source field.
+       }
+
+       /**
+        * @param array $highlightSource
+        * @return array|null
+        */
+       public function getHighlightingConfiguration( array $highlightSource ) {
+               return null;
+       }
+
+       /**
+        * Citolytics recommendations are stored as array in the 
citolytics_content index. Array elements need to be transformed
+        * to regular ES result set to be accessible via CirrusSearch as 
individual search results.
+        *
+        * @param SearchContext $context
+        * @param \Elastica\ResultSet $result
+        * @return ResultSet
+        */
+       public function transformElasticsearchResult( SearchContext $context, 
\Elastica\ResultSet $result ) {
+               $docs = $result->getDocuments();
+               $key_value_index = 0; // If multiple-index queries are used
+
+               // Check for empty results
+               if ( count( $docs ) < 1 || !isset( $docs[$key_value_index] ) || 
!isset( $docs[$key_value_index]->getData()['related_content'] ) || count( 
$docs[$key_value_index]->getData()['related_content'] ) < 1 ) {
+                       return new EmptyResultSet();
+               }
+
+               $relatedContent = 
$docs[$key_value_index]->getData()['related_content'];
+               // Overwrite hits in original response with names
+               $overwritten = $result->getResponse()->getData();
+               $overwritten['hits'] = array( 'total' => count( $relatedContent 
), // Needs to be set for pagination
+                       'max_score' => 1, 'hits' => array() );
+               // Generate artificial search results
+               foreach ( $relatedContent as $data ) {
+                       $overwritten['hits']['hits'][] = array( '_score' => 1, 
'_source' => array( 'title' => $data['title'], 'namespace' => 0 ), 'fields' => 
array(), );
+               }
+               // Build new result set from original query and overwritten 
reponse
+               $response = new \Elastica\Response( $overwritten, 
$result->getResponse()->getStatus() );
+               $result = \Elastica\ResultSet::create( $response, 
$result->getQuery() );
+
+               return new ResultSet( $context->getSuggestPrefixes(), 
$context->getSuggestSuffixes(), $result, $context->isSyntaxUsed(), 
$context->getConfig() );
+       }
+
+       /**
+        * @return EmptyResultSet
+        */
+       public function createEmptyResult() {
+               return new EmptyResultSet();
+       }
+}
\ No newline at end of file
diff --git a/includes/Search/SearchContext.php 
b/includes/Search/SearchContext.php
index 380edc4..725819e 100644
--- a/includes/Search/SearchContext.php
+++ b/includes/Search/SearchContext.php
@@ -720,4 +720,37 @@
                $this->extraScoreBuilders[] = $rescore;
        }
 
+       /**
+        * @var string
+        */
+       private $extraIndexBaseName;
+
+       /**
+        * @var ResultsType
+        */
+       private $extraResultsType;
+
+       public function hasExtraIndexBaseName() {
+               return isset($this->extraIndexBaseName);
+       }
+
+       public function getExtraIndexBaseName() {
+               return $this->extraIndexBaseName;
+       }
+
+       public function setExtraIndexBaseName($indexBaseName) {
+               $this->extraIndexBaseName = $indexBaseName;
+       }
+
+       public function hasExtraResultsType() {
+               return isset($this->extraResultsType);
+       }
+
+       public function getExtraResultsType() {
+               return $this->extraResultsType;
+       }
+
+       public function setExtraResultsType($resultsType) {
+               $this->extraResultsType = $resultsType;
+       }
 }
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 14a5cf7..e9f60c4 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -3,6 +3,7 @@
 namespace CirrusSearch;
 
 use CirrusSearch\Query\SimpleKeywordFeature;
+use CirrusSearch\Search\CitolyticsResultsType;
 use CirrusSearch\Search\FullTextResultsType;
 use CirrusSearch\Search\ResultsType;
 use CirrusSearch\Search\RescoreBuilder;
@@ -301,6 +302,8 @@
                        // very first item until combining with other queries
                        // is worked out.
                        new Query\MoreLikeFeature( $this->config ),
+                       // Handle Citolytics prefix (greedy)
+                       new Query\CitolyticsFeature(),
                        // Handle title prefix notation (greedy)
                        new Query\PrefixFeature(),
                        // Handle prefer-recent keyword
@@ -348,8 +351,6 @@
                        $builderSettings['settings']
                );
 
-
-
                if ( !( $qb instanceof FullTextQueryBuilder ) ) {
                        throw new RuntimeException( "Bad builder class 
configured: {$builderSettings['builder_class']}" );
                }
@@ -376,6 +377,14 @@
 
                $qb = $this->buildFullTextSearch( $term, $showSuggestion );
 
+               if ( $this->searchContext->hasExtraIndexBaseName() ) {
+                       $this->indexBaseName = 
$this->searchContext->getExtraIndexBaseName();
+               }
+
+               if ( $this->searchContext->hasExtraResultsType() ) {
+                       $this->setResultsType( 
$this->searchContext->getExtraResultsType() );
+               }
+
                $status = $this->searchOne();
                if ( !$status->isOK() && ElasticaErrorHandler::isParseError( 
$status ) ) {
                        if ( $qb->buildDegraded( $this->searchContext ) ) {
diff --git a/tests/unit/Query/CitolyticsFeatureTest.php 
b/tests/unit/Query/CitolyticsFeatureTest.php
new file mode 100644
index 0000000..4389b47
--- /dev/null
+++ b/tests/unit/Query/CitolyticsFeatureTest.php
@@ -0,0 +1,146 @@
+<?php
+
+namespace CirrusSearch\Query;
+
+use CirrusSearch\CirrusTestCase;
+use CirrusSearch\Connection;
+use CirrusSearch\Search\CitolyticsResultsType;
+use CirrusSearch\Search\SearchContext;
+use MediaWiki\MediaWikiServices;
+
+/**
+ * Test Citolytics keyword feature.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @group CirrusSearch
+ */
+class CitolyticsFeatureTest extends CirrusTestCase {
+       /**
+        * @var \Elastica\Client
+        */
+       private $client;
+
+       /**
+        * @var \CirrusSearch\SearchConfig
+        */
+       private $config;
+
+       protected $testDataIds = [ ];
+       protected $testData = [ array( 'title' => 'Foo', 'category' => 'Some 
category', 'namespace' => 0, 'related_content' => [ array( 'title' => 'Some 
related page', 'score' => 0.9 ), array( 'title' => 'Another related page', 
'score' => 0.8 ) ] ), array( 'title' => 'Bar', 'category' => 'Some category', 
'namespace' => 0, 'related_content' => [ array( 'title' => 'Foo', 'score' => 
0.9 ), array( 'title' => 'Foo page', 'score' => 0.8 ), array( 'title' => 'Foo 
related', 'score' => 0.3 ), array( 'title' => 'Foo Bar', 'score' => 0.1 ), ] ), 
];
+
+       private function getPath() {
+               return CitolyticsFeature::CITOLYTICS_INDEX_BASE . 
'_content/page';
+       }
+
+       protected function setUp() {
+               parent::setUp();
+
+               $this->config = 
MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 
'CirrusSearch' );
+               $this->client = ( new Connection( $this->config ) 
)->getClient();
+
+               $this->insertTestData();
+       }
+
+       protected function tearDown() {
+               $this->removeTestData();
+               parent::tearDown();
+       }
+
+       private function insertTestData() {
+               foreach ( $this->testData as $data ) {
+                       $response = $this->client->request( $this->getPath(), 
\Elastica\Request::POST, $data );
+
+                       $this->assertTrue( $response->isOk(), 'Creating test 
data failed' );
+
+                       $this->testDataIds[] = $response->getData()['_id'];
+               }
+       }
+
+       private function removeTestData() {
+               foreach ( $this->testDataIds as $id ) {
+                       $response = $this->client->request( $this->getPath() . 
'/' . $id, \Elastica\Request::DELETE );
+
+                       $this->assertTrue( $response->isOk(), 'Removing test 
data failed' );
+               }
+               $this->testDataIds = [ ];
+       }
+
+       /**
+        * Data provider for testApply
+        *
+        * @return array data for testApply
+        */
+       public function applyProvider() {
+               return [ 'unrelated queries' => [ 'some query', new 
\Elastica\Query\MatchAll(), null, null ], 'citolytics query for some page' => [ 
CitolyticsFeature::CITOLYTICS_PREFIX . 'Some Title', new \Elastica\Query\Match( 
CitolyticsFeature::CITOLYTICS_FIELD, 'Some Title' ), 
CitolyticsFeature::CITOLYTICS_INDEX_BASE, new CitolyticsResultsType() ], ];
+       }
+
+       /**
+        * Tests CitolyticsFeature
+        *
+        * @dataProvider applyProvider
+        */
+       public function testApply( $term, $expectedQuery, 
$expectedIndexBaseName, $expectedResultsType ) {
+
+               $context = new SearchContext( $this->config );
+
+               // Finally run the test
+               $feature = new CitolyticsFeature();
+
+               $result = $feature->apply( $context, $term );
+
+               // Assert query
+               if ( $expectedQuery === null ) {
+                       $this->assertFalse( $context->areResultsPossible() );
+               } else {
+                       $this->assertEquals( $expectedQuery, 
$context->getQuery() );
+                       if ( $expectedQuery instanceof \Elastica\Query\MatchAll 
) {
+                               $this->assertEquals( $term, $result, 'Term must 
be unchanged' );
+                       } else {
+                               $this->assertEquals( '', $result, 'Term must be 
empty string' );
+                       }
+               }
+
+               // Assert IndexBase
+               if ( $expectedIndexBaseName === null ) {
+                       $this->assertFalse( $context->hasExtraIndexBaseName() );
+               } else {
+                       $this->assertEquals( $expectedIndexBaseName, 
$context->getExtraIndexBaseName(), 'IndexBaseName must be changed' );
+               }
+
+               // Assert ResultsType
+               if ( $expectedResultsType === null ) {
+                       $this->assertFalse( $context->hasExtraResultsType() );
+               } else {
+                       $this->assertEquals( $expectedResultsType, 
$context->getExtraResultsType() );
+               }
+       }
+
+       /**
+        * Tests Citolytics search and result transformation with testData
+        */
+       public function testSearcherAndResultsType() {
+               $engine = new \CirrusSearch();
+
+               foreach ( $this->testData as $data ) {
+                       // Query with prefix and title
+                       $status = $engine->searchText( 
CitolyticsFeature::CITOLYTICS_PREFIX . $data['title'] );
+
+                       // Validate number of hits transformed by 
CitolyticsResultsType
+                       $this->assertEquals( count( $data['related_content'] ), 
$status->getValue()->getTotalHits(), 'Invalid number of hits' );
+               }
+       }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/329626
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7525eef60c60ce747d194321c552a3df22d96d8f
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Mschwarzer <wikit...@i.mieo.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to