jenkins-bot has submitted this change and it was merged.

Change subject: Elasticsearch support
......................................................................


Elasticsearch support

Change-Id: Ie6eef11ebdbead62d60eef113879eccf45ec4aea
---
M Coord.php
M GeoData.body.php
M GeoData.php
M GeoDataHooks.php
M api/ApiQueryCoordinates.php
M api/ApiQueryGeoSearchDb.php
A api/ApiQueryGeoSearchElastic.php
M solrupdate.php
8 files changed, 357 insertions(+), 14 deletions(-)

Approvals:
  Chad: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/Coord.php b/Coord.php
index 3c619e3..8d556bf 100644
--- a/Coord.php
+++ b/Coord.php
@@ -13,7 +13,10 @@
                $type,
                $name,
                $country,
-               $region;
+               $region,
+
+               $pageId,
+               $distance;
 
        public function __construct( $lat, $lon, $globe = null ) {
                global $wgDefaultGlobe;
@@ -85,7 +88,19 @@
                return $row;
        }
 
-       public static $fieldMapping = array(
+       /**
+        * Returns these coordinates as an associative array
+        * @return array
+        */
+       public function getAsArray() {
+               $result = array();
+               foreach ( self::getFields() as $field ) {
+                       $result[$field] = $this->$field;
+               }
+               return $result;
+       }
+
+       private static $fieldMapping = array(
                'id' => 'gt_id',
                'lat' => 'gt_lat',
                'lon' => 'gt_lon',
@@ -97,4 +112,24 @@
                'country' => 'gt_country',
                'region' => 'gt_region',
        );
+
+       public static function getFieldMapping() {
+               return self::$fieldMapping;
+       }
+
+       public static function getFields() {
+               static $fields = null;
+               if ( !$fields ) {
+                       $fields = array_keys( self::$fieldMapping );
+               }
+               return $fields;
+       }
+
+       public static function getColumns() {
+               static $columns = null;
+               if ( !$columns ) {
+                       $columns = array_values( self::$fieldMapping );
+               }
+               return $columns;
+       }
 }
diff --git a/GeoData.body.php b/GeoData.body.php
index d9288dd..5c088c7 100644
--- a/GeoData.body.php
+++ b/GeoData.body.php
@@ -44,7 +44,7 @@
        public static function getAllCoordinates( $pageId, $conds = array(), 
$dbType = DB_SLAVE ) {
                $db = wfGetDB( $dbType );
                $conds['gt_page_id'] = $pageId;
-               $res = $db->select( 'geo_tags', array_values( 
Coord::$fieldMapping ), $conds, __METHOD__ );
+               $res = $db->select( 'geo_tags', Coord::getColumns(), $conds, 
__METHOD__ );
                $coords = array();
                foreach ( $res as $row ) {
                        $coords[] = Coord::newFromRow( $row );
diff --git a/GeoData.php b/GeoData.php
index 24457ba..0b702a5 100644
--- a/GeoData.php
+++ b/GeoData.php
@@ -16,6 +16,7 @@
 $wgAutoloadClasses['ApiQueryCoordinates'] = "$dir/api/ApiQueryCoordinates.php";
 $wgAutoloadClasses['ApiQueryGeoSearch'] = "$dir/api/ApiQueryGeoSearch.php";
 $wgAutoloadClasses['ApiQueryGeoSearchDb'] = "$dir/api/ApiQueryGeoSearchDb.php";
+$wgAutoloadClasses['ApiQueryGeoSearchElastic'] = 
"$dir/api/ApiQueryGeoSearchElastic.php";
 $wgAutoloadClasses['ApiQueryGeoSearchSolr'] = 
"$dir/api/ApiQueryGeoSearchSolr.php";
 $wgAutoloadClasses['ApiQueryAllPages_GeoData'] = 
"$dir/api/ApiQueryAllPages_GeoData.php";
 $wgAutoloadClasses['ApiQueryCategoryMembers_GeoData'] = 
"$dir/api/ApiQueryCategoryMembers_GeoData.php";
@@ -48,6 +49,8 @@
 $wgHooks['LinksUpdate'][] = 'GeoDataHooks::onLinksUpdate';
 $wgHooks['FileUpload'][] = 'GeoDataHooks::onFileUpload';
 $wgHooks['OutputPageParserOutput'][] = 
'GeoDataHooks::onOutputPageParserOutput';
+$wgHooks['CirrusSearchMappingConfig'][] = 
'GeoDataHooks::onCirrusSearchMappingConfig';
+$wgHooks['CirrusSearchBuildDocumentParse'][] = 
'GeoDataHooks::onCirrusSearchBuildDocumentParse';
 
 // Use the proper search backend
 $wgExtensionFunctions[] = 'efInitGeoData';
@@ -180,7 +183,7 @@
 $wgGeoDataIndexGranularity = 10;
 
 /**
- * Which backend should be used by spatial searhces: 'db' or 'solr'
+ * Which backend should be used by spatial searhces: 'db', 'solr' or 'elastic'
  */
 $wgGeoDataBackend = 'db';
 
@@ -228,3 +231,10 @@
  * Setting it to false or empty array will disable wgCoordinates.
  */
 $wgGeoDataInJS = array( 'lat', 'lon' );
+
+/**
+ * Enables the use of GeoData as a CirrusSearch plugin for indexing.
+ * This is separate from $wgGeoDataBackend: you could be filling Elasticsearch 
index and using old search
+ * meanwhile. However, if backend is already set to 'elastic', GeoData always 
behaves as if it's true
+ */
+$wgGeoDataUseCirrusSearch = false;
diff --git a/GeoDataHooks.php b/GeoDataHooks.php
index 5d6338f..e67b448 100644
--- a/GeoDataHooks.php
+++ b/GeoDataHooks.php
@@ -81,7 +81,7 @@
 
                wfProfileIn( __METHOD__ );
                $dbw = wfGetDB( DB_MASTER );
-               if ( $wgGeoDataBackend != 'db' ) {
+               if ( $wgGeoDataBackend == 'solr' ) {
                        $res = $dbw->select( 'geo_tags', 'gt_id', array( 
'gt_page_id' => $id ), __METHOD__ );
                        $killlist = array();
                        foreach ( $res as $row ) {
@@ -282,4 +282,70 @@
 
                return true;
        }
+
+       /**
+        * CirrusSearchMappingConfig hook handler
+        * Adds our stuff to CirrusSearch/Elasticsearch schema
+        *
+        * @param array $config
+        *
+        * @return bool
+        */
+       public static function onCirrusSearchMappingConfig( array &$config ) {
+               global $wgGeoDataUseCirrusSearch, $wgGeoDataBackend;
+               if ( !$wgGeoDataUseCirrusSearch && $wgGeoDataBackend != 
'elastic' ) {
+                       return true;
+               }
+               $config['properties']['coordinates'] = array(
+                       'type' => 'nested',
+                       'properties' => array(
+                               'coord' => array( 'type' => 'geo_point' ),
+                               'globe' => array( 'type' => 'string', 'index' 
=> 'not_analyzed' ),
+                               'primary' => array( 'type' => 'boolean' ),
+                               'dim' => array( 'type' => 'float' ),
+                               'type' => array( 'type' => 'string', 'index' => 
'not_analyzed' ),
+                               'name' => array( 'type' => 'string', 'index' => 
'no' ),
+                               'country' => array( 'type' => 'string', 'index' 
=> 'not_analyzed' ),
+                               'region' => array( 'type' => 'string', 'index' 
=> 'not_analyzed' ),
+                       ),
+               );
+               return true;
+       }
+
+       /**
+        * CirrusSearchBuildDocumentParse hook handler
+        *
+        * @param Elastica\Document $doc
+        * @param Title $title
+        * @param Content $content
+        * @param ParserOutput $parserOutput
+        * @return bool
+        */
+       public static function onCirrusSearchBuildDocumentParse( 
Elastica\Document $doc,
+               Title $title,
+               Content $content,
+               ParserOutput $parserOutput )
+       {
+               global $wgGeoDataUseCirrusSearch, $wgGeoDataBackend;
+               if ( !( $wgGeoDataUseCirrusSearch || $wgGeoDataBackend == 
'elastic' )
+                       || !isset( $parserOutput->geoData ) )
+               {
+                       return true;
+               }
+
+               wfProfileIn( __METHOD__ );
+               $coords = array();
+               /** @var Coord $coord */
+               foreach ( $parserOutput->geoData->getAll() as $coord ) {
+                       $arr = $coord->getAsArray();
+                       $arr['coord'] = array( 'lat' => $coord->lat, 'lon' => 
$coord->lon );
+                       unset( $arr['id'] );
+                       unset( $arr['lat'] );
+                       unset( $arr['lon'] );
+                       $coords[] = $arr;
+               }
+               $doc->set( 'coordinates', $coords );
+               wfProfileOut( __METHOD__ );
+               return true;
+       }
 }
diff --git a/api/ApiQueryCoordinates.php b/api/ApiQueryCoordinates.php
index 807befd..0609cc0 100644
--- a/api/ApiQueryCoordinates.php
+++ b/api/ApiQueryCoordinates.php
@@ -18,9 +18,10 @@
                $params = $this->extractRequestParams();
                $this->addTables( 'geo_tags' );
                $this->addFields( array( 'gt_id', 'gt_page_id', 'gt_lat', 
'gt_lon', 'gt_primary' ) );
+               $mapping = Coord::getFieldMapping();
                foreach( $params['prop'] as $prop ) {
-                       if ( isset( Coord::$fieldMapping[$prop] ) ) {
-                               $this->addFields( Coord::$fieldMapping[$prop] );
+                       if ( isset( $mapping[$prop] ) ) {
+                               $this->addFields( $mapping[$prop] );
                        }
                }
                $this->addWhereFld( 'gt_page_id', array_keys( $titles ) );
@@ -59,8 +60,8 @@
                                $vals['primary'] = '';
                        }
                        foreach( $params['prop'] as $prop ) {
-                               if ( isset( Coord::$fieldMapping[$prop] ) && 
isset( $row->{Coord::$fieldMapping[$prop]} ) ) {
-                                       $field = Coord::$fieldMapping[$prop];
+                               if ( isset( $mapping[$prop] ) && isset( 
$row->{$mapping[$prop]} ) ) {
+                                       $field = $mapping[$prop];
                                        $vals[$prop] = $row->$field;
                                }
                        }
diff --git a/api/ApiQueryGeoSearchDb.php b/api/ApiQueryGeoSearchDb.php
index d21ce65..309d097 100644
--- a/api/ApiQueryGeoSearchDb.php
+++ b/api/ApiQueryGeoSearchDb.php
@@ -18,9 +18,10 @@
 
                $this->addTables( 'geo_tags' );
                $this->addFields( array( 'gt_lat', 'gt_lon', 'gt_primary' ) );
+               $mapping = Coord::getFieldMapping();
                foreach( $params['prop'] as $prop ) {
-                       if ( isset( Coord::$fieldMapping[$prop] ) ) {
-                               $this->addFields( Coord::$fieldMapping[$prop] );
+                       if ( isset( $mapping[$prop] ) ) {
+                               $this->addFields( $mapping[$prop] );
                        }
                }
                $this->addWhereFld( 'gt_globe', $params['globe'] );
@@ -71,8 +72,8 @@
                                        $vals['primary'] = '';
                                }
                                foreach( $params['prop'] as $prop ) {
-                                       if ( isset( Coord::$fieldMapping[$prop] 
) && isset( $row->{Coord::$fieldMapping[$prop]} ) ) {
-                                               $field = 
Coord::$fieldMapping[$prop];
+                                       if ( isset( $mapping[$prop] ) && isset( 
$row->{$mapping[$prop]} ) ) {
+                                               $field = $mapping[$prop];
                                                // Don't output default globe
                                                if ( !( $prop === 'globe' && 
$row->$field === $wgDefaultGlobe ) ) {
                                                        $vals[$prop] = 
$row->$field;
diff --git a/api/ApiQueryGeoSearchElastic.php b/api/ApiQueryGeoSearchElastic.php
new file mode 100644
index 0000000..12fc219
--- /dev/null
+++ b/api/ApiQueryGeoSearchElastic.php
@@ -0,0 +1,230 @@
+<?php
+
+class ApiQueryGeoSearchElastic extends ApiQueryGeoSearch {
+       private $params;
+
+       public function __construct( $query, $moduleName ) {
+               parent::__construct( $query, $moduleName );
+       }
+
+       /**
+        * @param ApiPageSet $resultPageSet
+        */
+       protected function run( $resultPageSet = null ) {
+               global $wgDefaultGlobe;
+
+               wfProfileIn( __METHOD__ );
+               parent::run( $resultPageSet );
+               $this->resetQueryParams();
+
+               try {
+                       $params = $this->params = $this->extractRequestParams();
+
+                       $bools = new Elastica\Filter\Bool();
+                       if ( $this->idToExclude ) {
+                               $bools->addMustNot(
+                                       new Elastica\Filter\Term( array( '_id' 
=> $this->idToExclude ) )
+                               );
+                       }
+                       // Only Earth is supported
+                       $bools->addMust( new Elastica\Filter\Term( array( 
'coordinates.globe' => 'earth' ) ) );
+                       if ( isset( $params['maxdim'] ) ) {
+                               $bools->addMust( new Elastica\Filter\Range(
+                                       'coordinates.dim',
+                                       array( 'to' => $params['maxdim'] ) )
+                               );
+                       }
+
+                       $primary = $params['primary'];
+                       if ( $primary !== 'all' ) {
+                               $bools->addMust( new Elastica\Filter\Term(
+                                       array( 'coordinates.primary' => intval( 
$primary === 'primary' ) )
+                               ) );
+                       }
+
+                       $query = new Elastica\Query();
+                       $fields = array_map(
+                               function( $prop ) { return "coordinates.$prop"; 
},
+                               array_merge( array( 'coord', 'primary' ), 
$params['prop'] )
+                       );
+                       $query->setParam( '_source', $fields );
+                       $filter = new Elastica\Filter\BoolAnd();
+                       $filter->addFilter( $bools );
+                       $filter->addFilter( new Elastica\Filter\GeoDistance( 
'coordinates.coord',
+                               array( 'lat' => $this->lat, 'lon' => $this->lon 
),
+                               $this->radius . 'm'
+                       ) );
+                       $nested = new Elastica\Filter\Nested();
+                       $nested->setPath( 'coordinates' )
+                               ->setFilter( $filter );
+                       if ( count( $params['namespace'] ) < count( 
MWNamespace::getValidNamespaces() ) ) {
+                               $outerFilter = new Elastica\Filter\Bool();
+                               $outerFilter->addMust( $nested );
+                               $outerFilter->addMust(
+                                       new Elastica\Filter\Terms( 'namespace', 
$params['namespace'] )
+                               );
+                               $query->setFilter( $outerFilter );
+                       } else {
+                               $query->setFilter( $nested );
+                       }
+
+                       $query->addSort(
+                               array(
+                                       '_geo_distance' => array(
+                                               'coordinates.coord' => array( 
'lat' => $this->lat, 'lon' => $this->lon ),
+                                               'order' => 'asc',
+                                               'unit' => 'm'
+                                       )
+                               )
+                       );
+                       $query->setSize( $params['limit'] );
+
+                       $pageType = CirrusSearch\Connection::getPageType( 
wfWikiID() );
+                       $search = $pageType->createSearch( $query );
+
+                       wfProfileIn( __METHOD__ . '-request' );
+                       $resultSet = $search->search();
+                       wfProfileOut( __METHOD__ . '-request' );
+
+                       $data = $resultSet->getResponse()->getData();
+
+                       if ( !isset( $data['hits']['hits'] ) ) {
+                               $this->dieDebug( __METHOD__, 'Unexpected result 
set returned by Elasticsearch' );
+                       }
+                       $ids = array();
+                       $coordinates = array();
+                       foreach ( $data['hits']['hits'] as $page ) {
+                               $id = $page['_id'];
+                               foreach ( $page['_source']['coordinates'] as 
$coordArray ) {
+                                       $coord = $this->makeCoord( $coordArray 
);
+                                       if ( !$this->filterCoord( $coord ) ) {
+                                               continue;
+                                       }
+                                       $coord->pageId = $id;
+                                       $coordinates[] = $coord;
+                                       $ids[$id] = true;
+                               }
+                       }
+                       usort( $coordinates, function( $coord1, $coord2 ) {
+                               if ( $coord1->distance == $coord2->distance ) {
+                                       return 0;
+                               }
+                               return ( $coord1->distance < $coord2->distance 
) ? -1 : 1;
+                       } );
+
+                       if ( !count( $coordinates ) ) {
+                               wfProfileOut( __METHOD__ );
+                               return; // No results, no point in doing 
anything else
+                       }
+                       $this->addWhere( array( 'page_id' => array_keys( $ids ) 
) );
+                       $this->addTables( 'page' );
+                       $this->addFields( array( 'page_id', 'page_title', 
'page_namespace' ) );
+
+                       wfProfileIn( __METHOD__ . '-sql' );
+                       $res = $this->select( __METHOD__ );
+                       wfProfileOut( __METHOD__ . '-sql' );
+
+
+                       if ( is_null( $resultPageSet ) ) {
+                               $titles = array();
+                               foreach ( $res as $row ) {
+                                       $titles[$row->page_id] = 
Title::newFromRow( $row );
+                               }
+
+                               $limit = $params['limit'];
+                               $result = $this->getResult();
+
+                               foreach ( $coordinates as $coord ) {
+                                       if ( !$limit-- ) {
+                                               break;
+                                       }
+                                       $id = $coord->pageId;
+                                       if ( !isset( $titles[$id] ) ) {
+                                               continue;
+                                       }
+                                               $title = $titles[$id];
+                                               $vals = array(
+                                                       'pageid' => intval( 
$coord->pageId ),
+                                                       'ns' => intval( 
$title->getNamespace() ),
+                                                       'title' => 
$title->getPrefixedText(),
+                                                       'lat' => floatval( 
$coord->lat ),
+                                                       'lon' => floatval( 
$coord->lon ),
+                                                       'dist' => round( 
$coord->distance, 1 ),
+                                               );
+
+                                               if ( $coord->primary ) {
+                                                       $vals['primary'] = '';
+                                               }
+                                               foreach( $params['prop'] as 
$prop ) {
+                                                       // Don't output default 
globe
+                                                       if ( !( $prop === 
'globe' && $coord->$prop === $wgDefaultGlobe ) ) {
+                                                               $vals[$prop] = 
$coord->$prop;
+                                                       }
+                                               }
+                                               $fit = $result->addValue(
+                                                       array( 'query', 
$this->getModuleName() ),
+                                                       null,
+                                                       $vals
+                                               );
+                                               if ( !$fit ) {
+                                                       break;
+                                               }
+                               }
+                       } else {
+                               $resultPageSet->populateFromQueryResult( 
$this->getDB(), $res );
+                       }
+               } catch ( Elastica\Exception\ExceptionInterface $e ) {
+                       throw new MWException( get_class( $e )
+                               . " at {$e->getFile()}, line {$e->getLine()}: 
{$e->getMessage()}", 0, $e
+                       );
+               }
+               wfProfileOut( __METHOD__ );
+       }
+
+       /**
+        * Creates a Coord class instance from an array returned by search
+        *
+        * @param array $hit: Search hit
+        *
+        * @return Coord
+        */
+       private function makeCoord( array $hit ) {
+               $lat = $hit['coord']['lat'];
+               $lon = $hit['coord']['lon'];
+               $coord = new Coord( $lat, $lon );
+               foreach ( Coord::getFields() as $field ) {
+                       if ( isset( $hit[$field] ) ) {
+                               $coord->$field = $hit[$field];
+                       }
+               }
+               $coord->distance =
+                       GeoDataMath::distance( $this->lat, $this->lon, 
$coord->lat, $coord->lon );
+               return $coord;
+       }
+
+       /**
+        * Checks whether given coordinates fall within the requested limits
+        * @param Coord $coord
+        *
+        * @return bool: If false, these coordinates should be discarded
+        */
+       private function filterCoord( Coord $coord ) {
+               if ( $coord->distance > $this->radius ) {
+                       return false;
+               }
+               // Only one globe is supported for search, this is future-proof
+               if ( $coord->globe != $this->params['globe'] ) {
+                       return false;
+               }
+               if ( isset( $this->params['maxdim'] ) && $coord->dim > 
$this->params['maxdim'] ) {
+                       return false;
+               }
+               $primary = $this->params['primary'];
+               if ( ( $primary == 'primary' && !$coord->primary )
+                       || ( $primary == 'secondary' && $coord->primary ) )
+               {
+                       return false;
+               }
+               return true;
+       }
+}
diff --git a/solrupdate.php b/solrupdate.php
index 7ffe779..8bf4716 100644
--- a/solrupdate.php
+++ b/solrupdate.php
@@ -98,7 +98,7 @@
 
                $solr = SolrGeoData::newClient( 'master' );
 
-               $fields = Coord::$fieldMapping;
+               $fields = Coord::getFieldMapping();
                $fields['page_id'] = 'gt_page_id';
 
                if ( $cutoffTags ) {

-- 
To view, visit https://gerrit.wikimedia.org/r/115413
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ie6eef11ebdbead62d60eef113879eccf45ec4aea
Gerrit-PatchSet: 9
Gerrit-Project: mediawiki/extensions/GeoData
Gerrit-Branch: master
Gerrit-Owner: MaxSem <maxsem.w...@gmail.com>
Gerrit-Reviewer: Chad <ch...@wikimedia.org>
Gerrit-Reviewer: Manybubbles <never...@wikimedia.org>
Gerrit-Reviewer: MaxSem <maxsem.w...@gmail.com>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to