Smalyshev has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/405059 )

Change subject: [WIP] Create deepcat: keyword
......................................................................

[WIP] Create deepcat: keyword

Usage as: deepcat:Vehicles to search category Vehicles
and all subcategories.

Bug: T184840
Change-Id: I08f2c7dc10d205d3f190c34980ef37998b53cb32
---
M CirrusSearch.php
M autoload.php
A includes/Query/DeepcatFeature.php
M includes/Query/InCategoryFeature.php
M includes/Searcher.php
A includes/ServiceWiring.php
6 files changed, 173 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/59/405059/1

diff --git a/CirrusSearch.php b/CirrusSearch.php
index c887306..1a2a837 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -1391,6 +1391,20 @@
  */
 $wgCirrusSearchMaxPhraseTokens = null;
 
+/**
+ * URL of the endpoint to look for categories, for deepcat keyword.
+ */
+$wgCirrusSearchCategoryEndpoint = '';
+/**
+ * Max depth for deep category query.
+ */
+$wgCirrusSearchCategoryDepth = 5;
+/**
+ * Max result count for deep category query.
+ */
+$wgCirrusSearchCategoryMax = 256;
+
+$wgServiceWiringFiles[] = __DIR__ . '/includes/ServiceWiring.php';
 /*
  * Please update docs/settings.txt if you add new values!
  */
diff --git a/autoload.php b/autoload.php
index 1c5f859..bf4d4c3 100644
--- a/autoload.php
+++ b/autoload.php
@@ -1,6 +1,6 @@
 <?php
 // This file is generated by scripts/gen-autoload.php, do not adjust manually
-// @codingStandardsIgnoreFile
+// phpcs:ignoreFile Generic.Files.LineLength
 global $wgAutoloadClasses;
 
 $wgAutoloadClasses += [
@@ -116,6 +116,7 @@
        'CirrusSearch\\Query\\CompSuggestQueryBuilder' => __DIR__ . 
'/includes/Query/CompSuggestQueryBuilder.php',
        'CirrusSearch\\Query\\ContentModelFeature' => __DIR__ . 
'/includes/Query/ContentModelFeature.php',
        'CirrusSearch\\Query\\CountContentWordsBuilder' => __DIR__ . 
'/includes/Query/CountContentWordsBuilder.php',
+       'CirrusSearch\\Query\\DeepcatFeature' => __DIR__ . 
'/includes/Query/DeepcatFeature.php',
        'CirrusSearch\\Query\\FileNumericFeature' => __DIR__ . 
'/includes/Query/FileNumericFeature.php',
        'CirrusSearch\\Query\\FileTypeFeature' => __DIR__ . 
'/includes/Query/FileTypeFeature.php',
        'CirrusSearch\\Query\\FullTextQueryBuilder' => __DIR__ . 
'/includes/Query/FullTextQueryBuilder.php',
diff --git a/includes/Query/DeepcatFeature.php 
b/includes/Query/DeepcatFeature.php
new file mode 100644
index 0000000..ba019a0
--- /dev/null
+++ b/includes/Query/DeepcatFeature.php
@@ -0,0 +1,137 @@
+<?php
+namespace CirrusSearch\Query;
+
+use CirrusSearch\Search\SearchContext;
+use CirrusSearch\SearchConfig;
+use Config;
+use MediaWiki\Sparql\SparqlClient;
+use MediaWiki\Sparql\SparqlException;
+use Title;
+
+/**
+ * Filters by category or its subcategories. E.g. if category Vehicles 
includes Cars
+ * and Boats, then search for Vehicles would match pages in Vehicles, Cars and 
Boats.
+ *
+ * Syntax:
+ *  deepcat:Vehicles
+ */
+class DeepcatFeature extends SimpleKeywordFeature {
+       /**
+        * Max lookup depth
+        * @var int
+        */
+       private $depth;
+       /**
+        * Max number of categories
+        * @var int
+        */
+       private $limit;
+       /**
+        * Category URL prefix for this wiki
+        * @var string
+        */
+       private $prefix;
+       /**
+        * @var SparqlClient
+        */
+       private $client;
+
+       /**
+        * @param Config $config
+        * @param SparqlClient $client
+        */
+       public function __construct( Config $config, SparqlClient $client ) {
+               $this->depth = (int)$config->get( 'CirrusSearchCategoryDepth' );
+               $this->limit = (int)$config->get( 'CirrusSearchCategoryMax' );
+               $this->prefix = $this->getCategoryPrefix();
+               $endpoint = $config->get( 'CirrusSearchCategoryEndpoint' );
+               if ( !empty( $endpoint ) ) {
+                       $this->client = $client;
+               }
+       }
+
+       /**
+        * @return string[] The list of keywords this feature is supposed to 
match
+        */
+       protected function getKeywords() {
+               return [ 'deepcat', 'deepcategory' ];
+       }
+
+       /**
+        * Applies the detected keyword from the search term. May apply changes
+        * either to $context directly, or return a filter to be added.
+        *
+        * @param SearchContext $context
+        * @param string $key The keyword
+        * @param string $value The value attached to the keyword with quotes 
stripped and escaped
+        *  quotes un-escaped.
+        * @param string $quotedValue The original value in the search string, 
including quotes if used
+        * @param bool $negated Is the search negated? Not used to generate the 
returned AbstractQuery,
+        *  that will be negated as necessary. Used for any other 
building/context necessary.
+        * @return array Two element array, first an AbstractQuery or null to 
apply to the
+        *  query. Second a boolean indicating if the quotedValue should be 
kept in the search
+        *  string.
+        */
+       protected function doApply( SearchContext $context, $key, $value, 
$quotedValue, $negated ) {
+               if ( empty( $this->client ) ) {
+                       $context->addWarning( 
'cirrussearch-feature-deepcat-endpoint' );
+                       return [ null, false ];
+               }
+
+               try {
+                       $categories = $this->fetchCategories( $value );
+               } catch( SparqlException $e ) {
+                       $context->addWarning( 
'cirrussearch-feature-deepcat-exception', $e->getMessage() );
+                       $categories = [ $value ];
+               }
+
+               $filter = new \Elastica\Query\BoolQuery();
+               foreach ( $categories as $cat ) {
+                       $filter->addShould( QueryHelper::matchPage( 
'category.lowercase_keyword', $cat ) );
+               }
+
+               return [ $filter, false ];
+       }
+
+       /**
+        * Get URL prefix for full category URL for this wiki.
+        * @return bool|string
+        */
+       private function getCategoryPrefix() {
+               $title = Title::makeTitle( NS_CATEGORY, 'ZZ' );
+               $fullName = $title->getFullURL( '', false, PROTO_CANONICAL );
+               return substr( $fullName, 0, - 2 );
+       }
+
+       /**
+        * Get child categories using SPARQL service.
+        * @param string $rootCategory Category to start looking from
+        * @return string[] List of subcategories.
+        * Note that the list may be incomplete due to limitations of the 
service.
+        * @throws SparqlException
+        */
+       private function fetchCategories( $rootCategory ) {
+               /** @var SparqlClient $client */
+               $title = Title::makeTitle( NS_CATEGORY, $rootCategory );
+               $fullName = $title->getFullURL( '', false, PROTO_CANONICAL );
+               $query = <<<SPARQL
+SELECT ?out WHERE {
+      SERVICE mediawiki:categoryTree {
+          bd:serviceParam mediawiki:start <$fullName> .
+          bd:serviceParam mediawiki:direction "Reverse" .
+          bd:serviceParam mediawiki:depth {$this->depth} .
+      }
+} ORDER BY ASC(?depth)
+LIMIT {$this->limit}
+SPARQL;
+               $result = $this->client->query( $query );
+
+               $prefixLen = strlen( $this->prefix );
+               return array_map( function ( $row ) use ( $prefixLen ) {
+                       // TODO: maybe we want to check the prefix is indeed 
the same?
+                       // It should be but who knows...
+                       return substr( $row['out'], $prefixLen );
+               }, $result );
+       }
+
+}
diff --git a/includes/Query/InCategoryFeature.php 
b/includes/Query/InCategoryFeature.php
index e364f86..7af035c 100644
--- a/includes/Query/InCategoryFeature.php
+++ b/includes/Query/InCategoryFeature.php
@@ -12,7 +12,7 @@
  * must follow the syntax `id:<id>`.
  *
  * We emulate template syntax here as best as possible, so things in NS_MAIN
- * are prefixed with ":" and things in NS_TEMPATE don't have a prefix at all.
+ * are prefixed with ":" and things in NS_TEMPLATE don't have a prefix at all.
  * Since we don't actually index templates like that, munge the query here.
  *
  * Examples:
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 51f2a19..1a5b2a8 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -307,6 +307,9 @@
                        new Query\ContentModelFeature(),
                        // subpageof keyword
                        new Query\SubPageOfFeature(),
+                       // deepcat keyword
+                       new Query\DeepcatFeature( $this->config,
+                               MediaWikiServices::getInstance()->getService( 
'CirrusCategoriesClient' ) ),
                ];
 
                $extraFeatures = [];
diff --git a/includes/ServiceWiring.php b/includes/ServiceWiring.php
new file mode 100644
index 0000000..b4e39b8
--- /dev/null
+++ b/includes/ServiceWiring.php
@@ -0,0 +1,16 @@
+<?php
+/**
+ * Services for CirrusSearch extensions
+ */
+
+use MediaWiki\MediaWikiServices;
+use MediaWiki\Sparql\SparqlClient;
+
+return [
+       // SPARQL client for deep category search
+       'CirrusCategoriesClient' => function ( MediaWikiServices $services ) {
+               $config = $services->getMainConfig();
+               return new SparqlClient( $config->get( 
'CirrusSearchCategoryEndpoint' ),
+                       $services->getHttpRequestFactory() );
+       },
+];
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/405059
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I08f2c7dc10d205d3f190c34980ef37998b53cb32
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Smalyshev <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to