Smalyshev has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/405059 )
Change subject: [WIP] Create deepcat: keyword
......................................................................
[WIP] Create deepcat: keyword
Usage as: deepcat:Vehicles to search category Vehicles
and all subcategories.
Bug: T184840
Change-Id: I08f2c7dc10d205d3f190c34980ef37998b53cb32
---
M CirrusSearch.php
M autoload.php
A includes/Query/DeepcatFeature.php
M includes/Query/InCategoryFeature.php
M includes/Searcher.php
A includes/ServiceWiring.php
6 files changed, 173 insertions(+), 2 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch
refs/changes/59/405059/1
diff --git a/CirrusSearch.php b/CirrusSearch.php
index c887306..1a2a837 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -1391,6 +1391,20 @@
*/
$wgCirrusSearchMaxPhraseTokens = null;
+/**
+ * URL of the endpoint to look for categories, for deepcat keyword.
+ */
+$wgCirrusSearchCategoryEndpoint = '';
+/**
+ * Max depth for deep category query.
+ */
+$wgCirrusSearchCategoryDepth = 5;
+/**
+ * Max result count for deep category query.
+ */
+$wgCirrusSearchCategoryMax = 256;
+
+$wgServiceWiringFiles[] = __DIR__ . '/includes/ServiceWiring.php';
/*
* Please update docs/settings.txt if you add new values!
*/
diff --git a/autoload.php b/autoload.php
index 1c5f859..bf4d4c3 100644
--- a/autoload.php
+++ b/autoload.php
@@ -1,6 +1,6 @@
<?php
// This file is generated by scripts/gen-autoload.php, do not adjust manually
-// @codingStandardsIgnoreFile
+// phpcs:ignoreFile Generic.Files.LineLength
global $wgAutoloadClasses;
$wgAutoloadClasses += [
@@ -116,6 +116,7 @@
'CirrusSearch\\Query\\CompSuggestQueryBuilder' => __DIR__ .
'/includes/Query/CompSuggestQueryBuilder.php',
'CirrusSearch\\Query\\ContentModelFeature' => __DIR__ .
'/includes/Query/ContentModelFeature.php',
'CirrusSearch\\Query\\CountContentWordsBuilder' => __DIR__ .
'/includes/Query/CountContentWordsBuilder.php',
+ 'CirrusSearch\\Query\\DeepcatFeature' => __DIR__ .
'/includes/Query/DeepcatFeature.php',
'CirrusSearch\\Query\\FileNumericFeature' => __DIR__ .
'/includes/Query/FileNumericFeature.php',
'CirrusSearch\\Query\\FileTypeFeature' => __DIR__ .
'/includes/Query/FileTypeFeature.php',
'CirrusSearch\\Query\\FullTextQueryBuilder' => __DIR__ .
'/includes/Query/FullTextQueryBuilder.php',
diff --git a/includes/Query/DeepcatFeature.php
b/includes/Query/DeepcatFeature.php
new file mode 100644
index 0000000..ba019a0
--- /dev/null
+++ b/includes/Query/DeepcatFeature.php
@@ -0,0 +1,137 @@
+<?php
+namespace CirrusSearch\Query;
+
+use CirrusSearch\Search\SearchContext;
+use CirrusSearch\SearchConfig;
+use Config;
+use MediaWiki\Sparql\SparqlClient;
+use MediaWiki\Sparql\SparqlException;
+use Title;
+
+/**
+ * Filters by category or its subcategories. E.g. if category Vehicles
includes Cars
+ * and Boats, then search for Vehicles would match pages in Vehicles, Cars and
Boats.
+ *
+ * Syntax:
+ * deepcat:Vehicles
+ */
+class DeepcatFeature extends SimpleKeywordFeature {
+ /**
+ * Max lookup depth
+ * @var int
+ */
+ private $depth;
+ /**
+ * Max number of categories
+ * @var int
+ */
+ private $limit;
+ /**
+ * Category URL prefix for this wiki
+ * @var string
+ */
+ private $prefix;
+ /**
+ * @var SparqlClient
+ */
+ private $client;
+
+ /**
+ * @param Config $config
+ * @param SparqlClient $client
+ */
+ public function __construct( Config $config, SparqlClient $client ) {
+ $this->depth = (int)$config->get( 'CirrusSearchCategoryDepth' );
+ $this->limit = (int)$config->get( 'CirrusSearchCategoryMax' );
+ $this->prefix = $this->getCategoryPrefix();
+ $endpoint = $config->get( 'CirrusSearchCategoryEndpoint' );
+ if ( !empty( $endpoint ) ) {
+ $this->client = $client;
+ }
+ }
+
+ /**
+ * @return string[] The list of keywords this feature is supposed to
match
+ */
+ protected function getKeywords() {
+ return [ 'deepcat', 'deepcategory' ];
+ }
+
+ /**
+ * Applies the detected keyword from the search term. May apply changes
+ * either to $context directly, or return a filter to be added.
+ *
+ * @param SearchContext $context
+ * @param string $key The keyword
+ * @param string $value The value attached to the keyword with quotes
stripped and escaped
+ * quotes un-escaped.
+ * @param string $quotedValue The original value in the search string,
including quotes if used
+ * @param bool $negated Is the search negated? Not used to generate the
returned AbstractQuery,
+ * that will be negated as necessary. Used for any other
building/context necessary.
+ * @return array Two element array, first an AbstractQuery or null to
apply to the
+ * query. Second a boolean indicating if the quotedValue should be
kept in the search
+ * string.
+ */
+ protected function doApply( SearchContext $context, $key, $value,
$quotedValue, $negated ) {
+ if ( empty( $this->client ) ) {
+ $context->addWarning(
'cirrussearch-feature-deepcat-endpoint' );
+ return [ null, false ];
+ }
+
+ try {
+ $categories = $this->fetchCategories( $value );
+ } catch( SparqlException $e ) {
+ $context->addWarning(
'cirrussearch-feature-deepcat-exception', $e->getMessage() );
+ $categories = [ $value ];
+ }
+
+ $filter = new \Elastica\Query\BoolQuery();
+ foreach ( $categories as $cat ) {
+ $filter->addShould( QueryHelper::matchPage(
'category.lowercase_keyword', $cat ) );
+ }
+
+ return [ $filter, false ];
+ }
+
+ /**
+ * Get URL prefix for full category URL for this wiki.
+ * @return bool|string
+ */
+ private function getCategoryPrefix() {
+ $title = Title::makeTitle( NS_CATEGORY, 'ZZ' );
+ $fullName = $title->getFullURL( '', false, PROTO_CANONICAL );
+ return substr( $fullName, 0, - 2 );
+ }
+
+ /**
+ * Get child categories using SPARQL service.
+ * @param string $rootCategory Category to start looking from
+ * @return string[] List of subcategories.
+ * Note that the list may be incomplete due to limitations of the
service.
+ * @throws SparqlException
+ */
+ private function fetchCategories( $rootCategory ) {
+ /** @var SparqlClient $client */
+ $title = Title::makeTitle( NS_CATEGORY, $rootCategory );
+ $fullName = $title->getFullURL( '', false, PROTO_CANONICAL );
+ $query = <<<SPARQL
+SELECT ?out WHERE {
+ SERVICE mediawiki:categoryTree {
+ bd:serviceParam mediawiki:start <$fullName> .
+ bd:serviceParam mediawiki:direction "Reverse" .
+ bd:serviceParam mediawiki:depth {$this->depth} .
+ }
+} ORDER BY ASC(?depth)
+LIMIT {$this->limit}
+SPARQL;
+ $result = $this->client->query( $query );
+
+ $prefixLen = strlen( $this->prefix );
+ return array_map( function ( $row ) use ( $prefixLen ) {
+ // TODO: maybe we want to check the prefix is indeed
the same?
+ // It should be but who knows...
+ return substr( $row['out'], $prefixLen );
+ }, $result );
+ }
+
+}
diff --git a/includes/Query/InCategoryFeature.php
b/includes/Query/InCategoryFeature.php
index e364f86..7af035c 100644
--- a/includes/Query/InCategoryFeature.php
+++ b/includes/Query/InCategoryFeature.php
@@ -12,7 +12,7 @@
* must follow the syntax `id:<id>`.
*
* We emulate template syntax here as best as possible, so things in NS_MAIN
- * are prefixed with ":" and things in NS_TEMPATE don't have a prefix at all.
+ * are prefixed with ":" and things in NS_TEMPLATE don't have a prefix at all.
* Since we don't actually index templates like that, munge the query here.
*
* Examples:
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 51f2a19..1a5b2a8 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -307,6 +307,9 @@
new Query\ContentModelFeature(),
// subpageof keyword
new Query\SubPageOfFeature(),
+ // deepcat keyword
+ new Query\DeepcatFeature( $this->config,
+ MediaWikiServices::getInstance()->getService(
'CirrusCategoriesClient' ) ),
];
$extraFeatures = [];
diff --git a/includes/ServiceWiring.php b/includes/ServiceWiring.php
new file mode 100644
index 0000000..b4e39b8
--- /dev/null
+++ b/includes/ServiceWiring.php
@@ -0,0 +1,16 @@
+<?php
+/**
+ * Services for CirrusSearch extensions
+ */
+
+use MediaWiki\MediaWikiServices;
+use MediaWiki\Sparql\SparqlClient;
+
+return [
+ // SPARQL client for deep category search
+ 'CirrusCategoriesClient' => function ( MediaWikiServices $services ) {
+ $config = $services->getMainConfig();
+ return new SparqlClient( $config->get(
'CirrusSearchCategoryEndpoint' ),
+ $services->getHttpRequestFactory() );
+ },
+];
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/405059
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I08f2c7dc10d205d3f190c34980ef37998b53cb32
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Smalyshev <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits