EBernhardson has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/312061

Change subject: Add a language based keyword filter
......................................................................

Add a language based keyword filter

Adds a new full text search keyword, inlanguage, that limits the result
set to pages marked as being in a specific language. Most commonly these
pages are created by using the Translate extension.

Change-Id: I15fc139531e3e6902ed64db915da1f8ef7910e99
---
M autoload.php
A includes/Query/LanguageFeature.php
M includes/Search/Filters.php
M includes/Searcher.php
4 files changed, 78 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/61/312061/1

diff --git a/autoload.php b/autoload.php
index e00d715..250ac5c 100644
--- a/autoload.php
+++ b/autoload.php
@@ -101,6 +101,7 @@
        'CirrusSearch\\Query\\InCategoryFeature' => __DIR__ . 
'/includes/Query/InCategoryFeature.php',
        'CirrusSearch\\Query\\InTitleFeature' => __DIR__ . 
'/includes/Query/InTitleFeature.php',
        'CirrusSearch\\Query\\KeywordFeature' => __DIR__ . 
'/includes/Query/KeywordFeature.php',
+       'CirrusSearch\\Query\\LanguageFeature' => __DIR__ . 
'/includes/Query/LanguageFeature.php',
        'CirrusSearch\\Query\\LinksToFeature' => __DIR__ . 
'/includes/Query/LinksToFeature.php',
        'CirrusSearch\\Query\\LocalFeature' => __DIR__ . 
'/includes/Query/LocalFeature.php',
        'CirrusSearch\\Query\\PreferRecentFeature' => __DIR__ . 
'/includes/Query/PreferRecentFeature.php',
diff --git a/includes/Query/LanguageFeature.php 
b/includes/Query/LanguageFeature.php
new file mode 100644
index 0000000..a5f39e0
--- /dev/null
+++ b/includes/Query/LanguageFeature.php
@@ -0,0 +1,50 @@
+<?php
+
+namespace CirrusSearch\Query;
+
+use CirrusSearch\Search\Filters;
+use CirrusSearch\Search\SearchContext;
+
+/**
+ * Filters the result set based on pages labeled with the provided language.
+ * More than one language can be specified with commas and they will be
+ * generated as an OR query.
+ *
+ * Examples:
+ *   inlanguage:en
+ *   inlanguage:fr,en
+ */
+class LanguageFeature extends SimpleKeywordFeature {
+       /**
+        * @return string
+        */
+       protected function getKeywordRegex() {
+               return 'inlanguage';
+       }
+
+       /**
+        * @param SearchContext $context
+        * @param string $key The keyword
+        * @param string $value The value attached to the keyword with quotes 
stripped
+        * @param string $quotedValue The original value in the search string, 
including quotes if used
+        * @param bool $negated Is the search negated? Not used to generate the 
returned AbstractQuery,
+        *  that will be negated as necessary. Used for any other 
building/context necessary.
+        * @return array Two element array, first an AbstractQuery or null to 
apply to the
+        *  query. Second a boolean indicating if the quotedValue should be 
kept in the search
+        *  string.
+        */
+       protected function doApply( SearchContext $context, $key, $value, 
$quotedValue, $negated ) {
+               $queries = [];
+
+               // Limit search to 20 languages. Arbitrarily chosen, but should 
be more
+               // than enough and some sort of limit has to be enforced.
+               $langs = array_slice( explode( ',', $value ), 0, 20 );
+               foreach ( $langs as $lang ) {
+                       $query = new \Elastica\Query\Match();
+                       $query->setFieldQuery( 'language', $lang );
+                       $queries[] = $query;
+               }
+
+               return [Filters::booleanOr( $queries, false ), false];
+       }
+}
diff --git a/includes/Search/Filters.php b/includes/Search/Filters.php
index ee10077..d7bbd58 100644
--- a/includes/Search/Filters.php
+++ b/includes/Search/Filters.php
@@ -5,6 +5,7 @@
 use Elastica;
 use Elastica\Query\AbstractQuery;
 use Elastica\Query\BoolQuery;
+use Elastica\Query\MatchAll;
 use GeoData\Coord;
 
 /**
@@ -27,6 +28,30 @@
  */
 class Filters {
        /**
+        * Turns a list of queries into a boolean OR, requiring only one
+        * of the provided queries to match.
+        *
+        * @param AbstractQuery[] $queries
+        * @param bool $matchAll When true (default) function never returns 
null,
+        *  when no queries are provided a MatchAll is returned.
+        * @return AbstractQuery|null The resulting OR query. Only returns null 
when
+        *  no queries are passed and $matchAll is false.
+        */
+       public static function booleanOr( array $queries, $matchAll = true ) {
+               if ( !$queries ) {
+                       return $matchAll ? new MatchAll() : null;
+               } elseif ( count( $queries ) === 1 ) {
+                       return reset( $queries );
+               } else {
+                       $bool = new BoolQuery();
+                       foreach ( $queries as $query ) {
+                               $bool->addShould( $query );
+                       }
+                       return $bool;
+               }
+       }
+
+       /**
         * Merges lists of include/exclude filters into a single filter that
         * Elasticsearch will execute efficiently.
         *
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 3b8389b..7610b71 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -331,6 +331,8 @@
                                new Query\SimpleInSourceFeature( $this->escaper 
),
                                // Handle intitle keyword
                                new Query\InTitleFeature( $this->escaper ),
+                               // inlanguage keyword
+                               new Query\LanguageFeature(),
                        ],
                        $builderSettings['settings']
                );

-- 
To view, visit https://gerrit.wikimedia.org/r/312061
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I15fc139531e3e6902ed64db915da1f8ef7910e99
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <ebernhard...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to