jenkins-bot has submitted this change and it was merged. Change subject: Elasticsearch support ......................................................................
Elasticsearch support Change-Id: Ie6eef11ebdbead62d60eef113879eccf45ec4aea --- M Coord.php M GeoData.body.php M GeoData.php M GeoDataHooks.php M api/ApiQueryCoordinates.php M api/ApiQueryGeoSearchDb.php A api/ApiQueryGeoSearchElastic.php M solrupdate.php 8 files changed, 357 insertions(+), 14 deletions(-) Approvals: Chad: Looks good to me, approved jenkins-bot: Verified diff --git a/Coord.php b/Coord.php index 3c619e3..8d556bf 100644 --- a/Coord.php +++ b/Coord.php @@ -13,7 +13,10 @@ $type, $name, $country, - $region; + $region, + + $pageId, + $distance; public function __construct( $lat, $lon, $globe = null ) { global $wgDefaultGlobe; @@ -85,7 +88,19 @@ return $row; } - public static $fieldMapping = array( + /** + * Returns these coordinates as an associative array + * @return array + */ + public function getAsArray() { + $result = array(); + foreach ( self::getFields() as $field ) { + $result[$field] = $this->$field; + } + return $result; + } + + private static $fieldMapping = array( 'id' => 'gt_id', 'lat' => 'gt_lat', 'lon' => 'gt_lon', @@ -97,4 +112,24 @@ 'country' => 'gt_country', 'region' => 'gt_region', ); + + public static function getFieldMapping() { + return self::$fieldMapping; + } + + public static function getFields() { + static $fields = null; + if ( !$fields ) { + $fields = array_keys( self::$fieldMapping ); + } + return $fields; + } + + public static function getColumns() { + static $columns = null; + if ( !$columns ) { + $columns = array_values( self::$fieldMapping ); + } + return $columns; + } } diff --git a/GeoData.body.php b/GeoData.body.php index d9288dd..5c088c7 100644 --- a/GeoData.body.php +++ b/GeoData.body.php @@ -44,7 +44,7 @@ public static function getAllCoordinates( $pageId, $conds = array(), $dbType = DB_SLAVE ) { $db = wfGetDB( $dbType ); $conds['gt_page_id'] = $pageId; - $res = $db->select( 'geo_tags', array_values( Coord::$fieldMapping ), $conds, __METHOD__ ); + $res = $db->select( 'geo_tags', Coord::getColumns(), $conds, __METHOD__ ); $coords = array(); foreach ( $res as $row ) { $coords[] = Coord::newFromRow( $row ); diff --git a/GeoData.php b/GeoData.php index 24457ba..0b702a5 100644 --- a/GeoData.php +++ b/GeoData.php @@ -16,6 +16,7 @@ $wgAutoloadClasses['ApiQueryCoordinates'] = "$dir/api/ApiQueryCoordinates.php"; $wgAutoloadClasses['ApiQueryGeoSearch'] = "$dir/api/ApiQueryGeoSearch.php"; $wgAutoloadClasses['ApiQueryGeoSearchDb'] = "$dir/api/ApiQueryGeoSearchDb.php"; +$wgAutoloadClasses['ApiQueryGeoSearchElastic'] = "$dir/api/ApiQueryGeoSearchElastic.php"; $wgAutoloadClasses['ApiQueryGeoSearchSolr'] = "$dir/api/ApiQueryGeoSearchSolr.php"; $wgAutoloadClasses['ApiQueryAllPages_GeoData'] = "$dir/api/ApiQueryAllPages_GeoData.php"; $wgAutoloadClasses['ApiQueryCategoryMembers_GeoData'] = "$dir/api/ApiQueryCategoryMembers_GeoData.php"; @@ -48,6 +49,8 @@ $wgHooks['LinksUpdate'][] = 'GeoDataHooks::onLinksUpdate'; $wgHooks['FileUpload'][] = 'GeoDataHooks::onFileUpload'; $wgHooks['OutputPageParserOutput'][] = 'GeoDataHooks::onOutputPageParserOutput'; +$wgHooks['CirrusSearchMappingConfig'][] = 'GeoDataHooks::onCirrusSearchMappingConfig'; +$wgHooks['CirrusSearchBuildDocumentParse'][] = 'GeoDataHooks::onCirrusSearchBuildDocumentParse'; // Use the proper search backend $wgExtensionFunctions[] = 'efInitGeoData'; @@ -180,7 +183,7 @@ $wgGeoDataIndexGranularity = 10; /** - * Which backend should be used by spatial searhces: 'db' or 'solr' + * Which backend should be used by spatial searhces: 'db', 'solr' or 'elastic' */ $wgGeoDataBackend = 'db'; @@ -228,3 +231,10 @@ * Setting it to false or empty array will disable wgCoordinates. */ $wgGeoDataInJS = array( 'lat', 'lon' ); + +/** + * Enables the use of GeoData as a CirrusSearch plugin for indexing. + * This is separate from $wgGeoDataBackend: you could be filling Elasticsearch index and using old search + * meanwhile. However, if backend is already set to 'elastic', GeoData always behaves as if it's true + */ +$wgGeoDataUseCirrusSearch = false; diff --git a/GeoDataHooks.php b/GeoDataHooks.php index 5d6338f..e67b448 100644 --- a/GeoDataHooks.php +++ b/GeoDataHooks.php @@ -81,7 +81,7 @@ wfProfileIn( __METHOD__ ); $dbw = wfGetDB( DB_MASTER ); - if ( $wgGeoDataBackend != 'db' ) { + if ( $wgGeoDataBackend == 'solr' ) { $res = $dbw->select( 'geo_tags', 'gt_id', array( 'gt_page_id' => $id ), __METHOD__ ); $killlist = array(); foreach ( $res as $row ) { @@ -282,4 +282,70 @@ return true; } + + /** + * CirrusSearchMappingConfig hook handler + * Adds our stuff to CirrusSearch/Elasticsearch schema + * + * @param array $config + * + * @return bool + */ + public static function onCirrusSearchMappingConfig( array &$config ) { + global $wgGeoDataUseCirrusSearch, $wgGeoDataBackend; + if ( !$wgGeoDataUseCirrusSearch && $wgGeoDataBackend != 'elastic' ) { + return true; + } + $config['properties']['coordinates'] = array( + 'type' => 'nested', + 'properties' => array( + 'coord' => array( 'type' => 'geo_point' ), + 'globe' => array( 'type' => 'string', 'index' => 'not_analyzed' ), + 'primary' => array( 'type' => 'boolean' ), + 'dim' => array( 'type' => 'float' ), + 'type' => array( 'type' => 'string', 'index' => 'not_analyzed' ), + 'name' => array( 'type' => 'string', 'index' => 'no' ), + 'country' => array( 'type' => 'string', 'index' => 'not_analyzed' ), + 'region' => array( 'type' => 'string', 'index' => 'not_analyzed' ), + ), + ); + return true; + } + + /** + * CirrusSearchBuildDocumentParse hook handler + * + * @param Elastica\Document $doc + * @param Title $title + * @param Content $content + * @param ParserOutput $parserOutput + * @return bool + */ + public static function onCirrusSearchBuildDocumentParse( Elastica\Document $doc, + Title $title, + Content $content, + ParserOutput $parserOutput ) + { + global $wgGeoDataUseCirrusSearch, $wgGeoDataBackend; + if ( !( $wgGeoDataUseCirrusSearch || $wgGeoDataBackend == 'elastic' ) + || !isset( $parserOutput->geoData ) ) + { + return true; + } + + wfProfileIn( __METHOD__ ); + $coords = array(); + /** @var Coord $coord */ + foreach ( $parserOutput->geoData->getAll() as $coord ) { + $arr = $coord->getAsArray(); + $arr['coord'] = array( 'lat' => $coord->lat, 'lon' => $coord->lon ); + unset( $arr['id'] ); + unset( $arr['lat'] ); + unset( $arr['lon'] ); + $coords[] = $arr; + } + $doc->set( 'coordinates', $coords ); + wfProfileOut( __METHOD__ ); + return true; + } } diff --git a/api/ApiQueryCoordinates.php b/api/ApiQueryCoordinates.php index 807befd..0609cc0 100644 --- a/api/ApiQueryCoordinates.php +++ b/api/ApiQueryCoordinates.php @@ -18,9 +18,10 @@ $params = $this->extractRequestParams(); $this->addTables( 'geo_tags' ); $this->addFields( array( 'gt_id', 'gt_page_id', 'gt_lat', 'gt_lon', 'gt_primary' ) ); + $mapping = Coord::getFieldMapping(); foreach( $params['prop'] as $prop ) { - if ( isset( Coord::$fieldMapping[$prop] ) ) { - $this->addFields( Coord::$fieldMapping[$prop] ); + if ( isset( $mapping[$prop] ) ) { + $this->addFields( $mapping[$prop] ); } } $this->addWhereFld( 'gt_page_id', array_keys( $titles ) ); @@ -59,8 +60,8 @@ $vals['primary'] = ''; } foreach( $params['prop'] as $prop ) { - if ( isset( Coord::$fieldMapping[$prop] ) && isset( $row->{Coord::$fieldMapping[$prop]} ) ) { - $field = Coord::$fieldMapping[$prop]; + if ( isset( $mapping[$prop] ) && isset( $row->{$mapping[$prop]} ) ) { + $field = $mapping[$prop]; $vals[$prop] = $row->$field; } } diff --git a/api/ApiQueryGeoSearchDb.php b/api/ApiQueryGeoSearchDb.php index d21ce65..309d097 100644 --- a/api/ApiQueryGeoSearchDb.php +++ b/api/ApiQueryGeoSearchDb.php @@ -18,9 +18,10 @@ $this->addTables( 'geo_tags' ); $this->addFields( array( 'gt_lat', 'gt_lon', 'gt_primary' ) ); + $mapping = Coord::getFieldMapping(); foreach( $params['prop'] as $prop ) { - if ( isset( Coord::$fieldMapping[$prop] ) ) { - $this->addFields( Coord::$fieldMapping[$prop] ); + if ( isset( $mapping[$prop] ) ) { + $this->addFields( $mapping[$prop] ); } } $this->addWhereFld( 'gt_globe', $params['globe'] ); @@ -71,8 +72,8 @@ $vals['primary'] = ''; } foreach( $params['prop'] as $prop ) { - if ( isset( Coord::$fieldMapping[$prop] ) && isset( $row->{Coord::$fieldMapping[$prop]} ) ) { - $field = Coord::$fieldMapping[$prop]; + if ( isset( $mapping[$prop] ) && isset( $row->{$mapping[$prop]} ) ) { + $field = $mapping[$prop]; // Don't output default globe if ( !( $prop === 'globe' && $row->$field === $wgDefaultGlobe ) ) { $vals[$prop] = $row->$field; diff --git a/api/ApiQueryGeoSearchElastic.php b/api/ApiQueryGeoSearchElastic.php new file mode 100644 index 0000000..12fc219 --- /dev/null +++ b/api/ApiQueryGeoSearchElastic.php @@ -0,0 +1,230 @@ +<?php + +class ApiQueryGeoSearchElastic extends ApiQueryGeoSearch { + private $params; + + public function __construct( $query, $moduleName ) { + parent::__construct( $query, $moduleName ); + } + + /** + * @param ApiPageSet $resultPageSet + */ + protected function run( $resultPageSet = null ) { + global $wgDefaultGlobe; + + wfProfileIn( __METHOD__ ); + parent::run( $resultPageSet ); + $this->resetQueryParams(); + + try { + $params = $this->params = $this->extractRequestParams(); + + $bools = new Elastica\Filter\Bool(); + if ( $this->idToExclude ) { + $bools->addMustNot( + new Elastica\Filter\Term( array( '_id' => $this->idToExclude ) ) + ); + } + // Only Earth is supported + $bools->addMust( new Elastica\Filter\Term( array( 'coordinates.globe' => 'earth' ) ) ); + if ( isset( $params['maxdim'] ) ) { + $bools->addMust( new Elastica\Filter\Range( + 'coordinates.dim', + array( 'to' => $params['maxdim'] ) ) + ); + } + + $primary = $params['primary']; + if ( $primary !== 'all' ) { + $bools->addMust( new Elastica\Filter\Term( + array( 'coordinates.primary' => intval( $primary === 'primary' ) ) + ) ); + } + + $query = new Elastica\Query(); + $fields = array_map( + function( $prop ) { return "coordinates.$prop"; }, + array_merge( array( 'coord', 'primary' ), $params['prop'] ) + ); + $query->setParam( '_source', $fields ); + $filter = new Elastica\Filter\BoolAnd(); + $filter->addFilter( $bools ); + $filter->addFilter( new Elastica\Filter\GeoDistance( 'coordinates.coord', + array( 'lat' => $this->lat, 'lon' => $this->lon ), + $this->radius . 'm' + ) ); + $nested = new Elastica\Filter\Nested(); + $nested->setPath( 'coordinates' ) + ->setFilter( $filter ); + if ( count( $params['namespace'] ) < count( MWNamespace::getValidNamespaces() ) ) { + $outerFilter = new Elastica\Filter\Bool(); + $outerFilter->addMust( $nested ); + $outerFilter->addMust( + new Elastica\Filter\Terms( 'namespace', $params['namespace'] ) + ); + $query->setFilter( $outerFilter ); + } else { + $query->setFilter( $nested ); + } + + $query->addSort( + array( + '_geo_distance' => array( + 'coordinates.coord' => array( 'lat' => $this->lat, 'lon' => $this->lon ), + 'order' => 'asc', + 'unit' => 'm' + ) + ) + ); + $query->setSize( $params['limit'] ); + + $pageType = CirrusSearch\Connection::getPageType( wfWikiID() ); + $search = $pageType->createSearch( $query ); + + wfProfileIn( __METHOD__ . '-request' ); + $resultSet = $search->search(); + wfProfileOut( __METHOD__ . '-request' ); + + $data = $resultSet->getResponse()->getData(); + + if ( !isset( $data['hits']['hits'] ) ) { + $this->dieDebug( __METHOD__, 'Unexpected result set returned by Elasticsearch' ); + } + $ids = array(); + $coordinates = array(); + foreach ( $data['hits']['hits'] as $page ) { + $id = $page['_id']; + foreach ( $page['_source']['coordinates'] as $coordArray ) { + $coord = $this->makeCoord( $coordArray ); + if ( !$this->filterCoord( $coord ) ) { + continue; + } + $coord->pageId = $id; + $coordinates[] = $coord; + $ids[$id] = true; + } + } + usort( $coordinates, function( $coord1, $coord2 ) { + if ( $coord1->distance == $coord2->distance ) { + return 0; + } + return ( $coord1->distance < $coord2->distance ) ? -1 : 1; + } ); + + if ( !count( $coordinates ) ) { + wfProfileOut( __METHOD__ ); + return; // No results, no point in doing anything else + } + $this->addWhere( array( 'page_id' => array_keys( $ids ) ) ); + $this->addTables( 'page' ); + $this->addFields( array( 'page_id', 'page_title', 'page_namespace' ) ); + + wfProfileIn( __METHOD__ . '-sql' ); + $res = $this->select( __METHOD__ ); + wfProfileOut( __METHOD__ . '-sql' ); + + + if ( is_null( $resultPageSet ) ) { + $titles = array(); + foreach ( $res as $row ) { + $titles[$row->page_id] = Title::newFromRow( $row ); + } + + $limit = $params['limit']; + $result = $this->getResult(); + + foreach ( $coordinates as $coord ) { + if ( !$limit-- ) { + break; + } + $id = $coord->pageId; + if ( !isset( $titles[$id] ) ) { + continue; + } + $title = $titles[$id]; + $vals = array( + 'pageid' => intval( $coord->pageId ), + 'ns' => intval( $title->getNamespace() ), + 'title' => $title->getPrefixedText(), + 'lat' => floatval( $coord->lat ), + 'lon' => floatval( $coord->lon ), + 'dist' => round( $coord->distance, 1 ), + ); + + if ( $coord->primary ) { + $vals['primary'] = ''; + } + foreach( $params['prop'] as $prop ) { + // Don't output default globe + if ( !( $prop === 'globe' && $coord->$prop === $wgDefaultGlobe ) ) { + $vals[$prop] = $coord->$prop; + } + } + $fit = $result->addValue( + array( 'query', $this->getModuleName() ), + null, + $vals + ); + if ( !$fit ) { + break; + } + } + } else { + $resultPageSet->populateFromQueryResult( $this->getDB(), $res ); + } + } catch ( Elastica\Exception\ExceptionInterface $e ) { + throw new MWException( get_class( $e ) + . " at {$e->getFile()}, line {$e->getLine()}: {$e->getMessage()}", 0, $e + ); + } + wfProfileOut( __METHOD__ ); + } + + /** + * Creates a Coord class instance from an array returned by search + * + * @param array $hit: Search hit + * + * @return Coord + */ + private function makeCoord( array $hit ) { + $lat = $hit['coord']['lat']; + $lon = $hit['coord']['lon']; + $coord = new Coord( $lat, $lon ); + foreach ( Coord::getFields() as $field ) { + if ( isset( $hit[$field] ) ) { + $coord->$field = $hit[$field]; + } + } + $coord->distance = + GeoDataMath::distance( $this->lat, $this->lon, $coord->lat, $coord->lon ); + return $coord; + } + + /** + * Checks whether given coordinates fall within the requested limits + * @param Coord $coord + * + * @return bool: If false, these coordinates should be discarded + */ + private function filterCoord( Coord $coord ) { + if ( $coord->distance > $this->radius ) { + return false; + } + // Only one globe is supported for search, this is future-proof + if ( $coord->globe != $this->params['globe'] ) { + return false; + } + if ( isset( $this->params['maxdim'] ) && $coord->dim > $this->params['maxdim'] ) { + return false; + } + $primary = $this->params['primary']; + if ( ( $primary == 'primary' && !$coord->primary ) + || ( $primary == 'secondary' && $coord->primary ) ) + { + return false; + } + return true; + } +} diff --git a/solrupdate.php b/solrupdate.php index 7ffe779..8bf4716 100644 --- a/solrupdate.php +++ b/solrupdate.php @@ -98,7 +98,7 @@ $solr = SolrGeoData::newClient( 'master' ); - $fields = Coord::$fieldMapping; + $fields = Coord::getFieldMapping(); $fields['page_id'] = 'gt_page_id'; if ( $cutoffTags ) { -- To view, visit https://gerrit.wikimedia.org/r/115413 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: Ie6eef11ebdbead62d60eef113879eccf45ec4aea Gerrit-PatchSet: 9 Gerrit-Project: mediawiki/extensions/GeoData Gerrit-Branch: master Gerrit-Owner: MaxSem <maxsem.w...@gmail.com> Gerrit-Reviewer: Chad <ch...@wikimedia.org> Gerrit-Reviewer: Manybubbles <never...@wikimedia.org> Gerrit-Reviewer: MaxSem <maxsem.w...@gmail.com> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits