Manybubbles has uploaded a new change for review. https://gerrit.wikimedia.org/r/153649
Change subject: Add cirrusdump action for helpful debugging ...................................................................... Add cirrusdump action for helpful debugging Going to /wiki/Page_Name?action=cirrusdump spits out what we have in the Elasticsearch index for the page. Two reasons this is cool: 1. Saves a few steps when we're debugging. 2. Publicly exposes a safe part of the infrastructure. Its cool to for people to look at and allows external users to review what we have either to suggest things or find bugs or just learn from it. Bug: 69442 Change-Id: Ic613c2d1a77198a3d2aa7035fd93257f886a2c10 --- M CirrusSearch.php A includes/Dump.php M includes/Searcher.php A tests/browser/features/dump_action.feature M tests/browser/features/step_definitions/general_steps.rb M tests/browser/features/step_definitions/search_steps.rb A tests/browser/features/support/pages/dump_page.rb 7 files changed, 94 insertions(+), 4 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch refs/changes/49/153649/1 diff --git a/CirrusSearch.php b/CirrusSearch.php index ea99039..7aeadee 100644 --- a/CirrusSearch.php +++ b/CirrusSearch.php @@ -410,6 +410,7 @@ $wgAutoloadClasses['CirrusSearch\BuildDocument\ParseBuilder'] = $buildDocument . 'Builder.php'; $wgAutoloadClasses['CirrusSearch\BuildDocument\RedirectsAndIncomingLinks'] = $buildDocument . 'RedirectsAndIncomingLinks.php'; $wgAutoloadClasses['CirrusSearch\Connection'] = $includes . 'Connection.php'; +$wgAutoloadClasses['CirrusSearch\Dump'] = $includes . 'Dump.php'; $wgAutoloadClasses['CirrusSearch\ElasticsearchIntermediary'] = $includes . 'ElasticsearchIntermediary.php'; $wgAutoloadClasses['CirrusSearch\ForceSearchIndex'] = __DIR__ . '/maintenance/forceSearchIndex.php'; $wgAutoloadClasses['CirrusSearch\Hooks'] = $includes . 'Hooks.php'; @@ -487,6 +488,11 @@ $wgJobClasses[ 'cirrusSearchOtherIndex' ] = 'CirrusSearch\Job\OtherIndex'; /** + * Actions + */ +$wgActions[ 'cirrusdump' ] = 'CirrusSearch\Dump'; + +/** * Jenkins configuration required to get all the browser tests passing cleanly. * Note that it is only hooked for browser tests. */ diff --git a/includes/Dump.php b/includes/Dump.php new file mode 100644 index 0000000..0b934be --- /dev/null +++ b/includes/Dump.php @@ -0,0 +1,64 @@ +<?php + +namespace CirrusSearch; + +use \FormlessAction; + +/** + * action=cirruDump handler. Dumps contents of Elasticsearch indexes for the + * page. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + */ +class Dump extends FormlessAction { + public function onView() { + // Disable regular results + $this->getOutput()->disable(); + + $response = $this->getRequest()->response(); + $response->header( 'Content-type: application/json; charset=UTF-8' ); + + $searcher = new Searcher( 0, 0, false, $this->getUser() ); + $id = $this->getTitle()->getArticleID(); + $esSources = $searcher->get( array( $id ), true ); + if ( !$esSources->isOk() ) { + // Exception has been logged + echo '{}'; + return null; + } + $esSources = $esSources->getValue(); + + $result = array(); + foreach ( $esSources as $esSource ) { + $result[ $esSource->getIndex() ] = $esSource->getData(); + } + echo json_encode( $result ); + + return null; + } + + public function getName() { + return 'cirrusdump'; + } + + public function requiresWrite() { + return false; + } + + public function requiresUnblock() { + return false; + } +} diff --git a/includes/Searcher.php b/includes/Searcher.php index 9261ddf..db8fb8d 100644 --- a/includes/Searcher.php +++ b/includes/Searcher.php @@ -739,18 +739,18 @@ * Get the page with $id. Note that the result is a status containing _all_ pages found. * It is possible to find more then one page if the page is in multiple indexes. * @param array(int) $pageIds page id - * @param array(string) $fields fields to fetch + * @param array(string)|true|false $sourceFiltering source filtering to apply * @return Status containing pages found, containing an empty array if not found, * or an error if there was an error */ - public function get( $pageIds, $fields ) { + public function get( $pageIds, $sourceFiltering ) { $profiler = new ProfileSection( __METHOD__ ); $indexType = $this->pickIndexTypeFromNamespaces(); $searcher = $this; $indexBaseName = $this->indexBaseName; $getWork = new PoolCounterWorkViaCallback( 'CirrusSearch-Search', "_elasticsearch", array( - 'doWork' => function() use ( $searcher, $pageIds, $fields, $indexType, $indexBaseName ) { + 'doWork' => function() use ( $searcher, $pageIds, $sourceFiltering, $indexType, $indexBaseName ) { try { global $wgCirrusSearchClientSideSearchTimeout; $searcher->start( "get of $indexType." . implode( ', ', $pageIds ) ); @@ -758,7 +758,7 @@ Connection::setTimeout( $wgCirrusSearchClientSideSearchTimeout[ 'default' ] ); $pageType = Connection::getPageType( $indexBaseName, $indexType ); $query = new \Elastica\Query( new \Elastica\Query\Ids( null, $pageIds ) ); - $query->setParam( 'fields', $fields ); + $query->setParam( '_source', $sourceFiltering ); $resultSet = $pageType->search( $query, array( 'search_type' => 'query_and_fetch' ) ); return $searcher->success( $resultSet->getResults() ); } catch ( \Elastica\Exception\NotFoundException $e ) { diff --git a/tests/browser/features/dump_action.feature b/tests/browser/features/dump_action.feature new file mode 100644 index 0000000..27373cc --- /dev/null +++ b/tests/browser/features/dump_action.feature @@ -0,0 +1,7 @@ +@clean @phantomjs @dump_action +Feature: Cirrus dump + Scenario: Can dump pages + When I dump the cirrus data for Main Page + Then the page text contains Main Page + And the page text contains template + And the page text contains namespace diff --git a/tests/browser/features/step_definitions/general_steps.rb b/tests/browser/features/step_definitions/general_steps.rb index 6231c5a..4b90e33 100644 --- a/tests/browser/features/step_definitions/general_steps.rb +++ b/tests/browser/features/step_definitions/general_steps.rb @@ -1,3 +1,6 @@ Given(/wait ([0-9]+) seconds/) do |seconds| sleep(Integer(seconds)) end +Then(/the page text contains .*/) do |text| + browser.html should include text +end diff --git a/tests/browser/features/step_definitions/search_steps.rb b/tests/browser/features/step_definitions/search_steps.rb index 78c73e9..1dd0738 100644 --- a/tests/browser/features/step_definitions/search_steps.rb +++ b/tests/browser/features/step_definitions/search_steps.rb @@ -84,6 +84,9 @@ end end end +When(/^I dump the cirrus data for (.+)$/) do |title| + visit(CirrusDumpPage, using_params: {page_name: title}) +end Then(/^suggestions should( not)? appear$/) do |not_appear| if not_appear then diff --git a/tests/browser/features/support/pages/dump_page.rb b/tests/browser/features/support/pages/dump_page.rb new file mode 100644 index 0000000..dc3702d --- /dev/null +++ b/tests/browser/features/support/pages/dump_page.rb @@ -0,0 +1,7 @@ +require "cgi" + +class CirrusDumpPage + include PageObject + + page_url URL.url("../w/index.php?title=<%=CGI.escape(params[:page_name])%>&action=cirrusdump") +end -- To view, visit https://gerrit.wikimedia.org/r/153649 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ic613c2d1a77198a3d2aa7035fd93257f886a2c10 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/extensions/CirrusSearch Gerrit-Branch: master Gerrit-Owner: Manybubbles <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
