Manybubbles has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/153649

Change subject: Add cirrusdump action for helpful debugging
......................................................................

Add cirrusdump action for helpful debugging

Going to /wiki/Page_Name?action=cirrusdump spits out what we have in the
Elasticsearch index for the page.  Two reasons this is cool:
1.  Saves a few steps when we're debugging.
2.  Publicly exposes a safe part of the infrastructure.  Its cool to for
people to look at and allows external users to review what we have either
to suggest things or find bugs or just learn from it.

Bug: 69442
Change-Id: Ic613c2d1a77198a3d2aa7035fd93257f886a2c10
---
M CirrusSearch.php
A includes/Dump.php
M includes/Searcher.php
A tests/browser/features/dump_action.feature
M tests/browser/features/step_definitions/general_steps.rb
M tests/browser/features/step_definitions/search_steps.rb
A tests/browser/features/support/pages/dump_page.rb
7 files changed, 94 insertions(+), 4 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/CirrusSearch 
refs/changes/49/153649/1

diff --git a/CirrusSearch.php b/CirrusSearch.php
index ea99039..7aeadee 100644
--- a/CirrusSearch.php
+++ b/CirrusSearch.php
@@ -410,6 +410,7 @@
 $wgAutoloadClasses['CirrusSearch\BuildDocument\ParseBuilder'] = $buildDocument 
. 'Builder.php';
 $wgAutoloadClasses['CirrusSearch\BuildDocument\RedirectsAndIncomingLinks'] = 
$buildDocument . 'RedirectsAndIncomingLinks.php';
 $wgAutoloadClasses['CirrusSearch\Connection'] = $includes . 'Connection.php';
+$wgAutoloadClasses['CirrusSearch\Dump'] = $includes . 'Dump.php';
 $wgAutoloadClasses['CirrusSearch\ElasticsearchIntermediary'] = $includes . 
'ElasticsearchIntermediary.php';
 $wgAutoloadClasses['CirrusSearch\ForceSearchIndex'] = __DIR__ . 
'/maintenance/forceSearchIndex.php';
 $wgAutoloadClasses['CirrusSearch\Hooks'] = $includes . 'Hooks.php';
@@ -487,6 +488,11 @@
 $wgJobClasses[ 'cirrusSearchOtherIndex' ] = 'CirrusSearch\Job\OtherIndex';
 
 /**
+ * Actions
+ */
+$wgActions[ 'cirrusdump' ] = 'CirrusSearch\Dump';
+
+/**
  * Jenkins configuration required to get all the browser tests passing cleanly.
  * Note that it is only hooked for browser tests.
  */
diff --git a/includes/Dump.php b/includes/Dump.php
new file mode 100644
index 0000000..0b934be
--- /dev/null
+++ b/includes/Dump.php
@@ -0,0 +1,64 @@
+<?php
+
+namespace CirrusSearch;
+
+use \FormlessAction;
+
+/**
+ * action=cirruDump handler.  Dumps contents of Elasticsearch indexes for the
+ * page.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ */
+class Dump extends FormlessAction {
+       public function onView() {
+               // Disable regular results
+               $this->getOutput()->disable();
+
+               $response = $this->getRequest()->response();
+               $response->header( 'Content-type: application/json; 
charset=UTF-8' );
+
+               $searcher = new Searcher( 0, 0, false, $this->getUser() );
+               $id = $this->getTitle()->getArticleID();
+               $esSources = $searcher->get( array( $id ), true );
+               if ( !$esSources->isOk() ) {
+                       // Exception has been logged
+                       echo '{}';
+                       return null;
+               }
+               $esSources = $esSources->getValue();
+
+               $result = array();
+               foreach ( $esSources as $esSource ) {
+                       $result[ $esSource->getIndex() ] = $esSource->getData();
+               }
+               echo json_encode( $result );
+
+               return null;
+       }
+
+       public function getName() {
+               return 'cirrusdump';
+       }
+
+       public function requiresWrite() {
+               return false;
+       }
+
+       public function requiresUnblock() {
+               return false;
+       }
+}
diff --git a/includes/Searcher.php b/includes/Searcher.php
index 9261ddf..db8fb8d 100644
--- a/includes/Searcher.php
+++ b/includes/Searcher.php
@@ -739,18 +739,18 @@
         * Get the page with $id.  Note that the result is a status containing 
_all_ pages found.
         * It is possible to find more then one page if the page is in multiple 
indexes.
         * @param array(int) $pageIds page id
-        * @param array(string) $fields fields to fetch
+        * @param array(string)|true|false $sourceFiltering source filtering to 
apply
         * @return Status containing pages found, containing an empty array if 
not found,
         *    or an error if there was an error
         */
-       public function get( $pageIds, $fields ) {
+       public function get( $pageIds, $sourceFiltering ) {
                $profiler = new ProfileSection( __METHOD__ );
 
                $indexType = $this->pickIndexTypeFromNamespaces();
                $searcher = $this;
                $indexBaseName = $this->indexBaseName;
                $getWork = new PoolCounterWorkViaCallback( 
'CirrusSearch-Search', "_elasticsearch", array(
-                       'doWork' => function() use ( $searcher, $pageIds, 
$fields, $indexType, $indexBaseName ) {
+                       'doWork' => function() use ( $searcher, $pageIds, 
$sourceFiltering, $indexType, $indexBaseName ) {
                                try {
                                        global 
$wgCirrusSearchClientSideSearchTimeout;
                                        $searcher->start( "get of $indexType." 
. implode( ', ', $pageIds ) );
@@ -758,7 +758,7 @@
                                        Connection::setTimeout( 
$wgCirrusSearchClientSideSearchTimeout[ 'default' ] );
                                        $pageType = Connection::getPageType( 
$indexBaseName, $indexType );
                                        $query = new \Elastica\Query( new 
\Elastica\Query\Ids( null, $pageIds ) );
-                                       $query->setParam( 'fields', $fields );
+                                       $query->setParam( '_source', 
$sourceFiltering );
                                        $resultSet = $pageType->search( $query, 
array( 'search_type' => 'query_and_fetch' ) );
                                        return $searcher->success( 
$resultSet->getResults() );
                                } catch ( \Elastica\Exception\NotFoundException 
$e ) {
diff --git a/tests/browser/features/dump_action.feature 
b/tests/browser/features/dump_action.feature
new file mode 100644
index 0000000..27373cc
--- /dev/null
+++ b/tests/browser/features/dump_action.feature
@@ -0,0 +1,7 @@
+@clean @phantomjs @dump_action
+Feature: Cirrus dump
+  Scenario: Can dump pages
+    When I dump the cirrus data for Main Page
+    Then the page text contains Main Page
+    And the page text contains template
+    And the page text contains namespace
diff --git a/tests/browser/features/step_definitions/general_steps.rb 
b/tests/browser/features/step_definitions/general_steps.rb
index 6231c5a..4b90e33 100644
--- a/tests/browser/features/step_definitions/general_steps.rb
+++ b/tests/browser/features/step_definitions/general_steps.rb
@@ -1,3 +1,6 @@
 Given(/wait ([0-9]+) seconds/) do |seconds|
   sleep(Integer(seconds))
 end
+Then(/the page text contains .*/) do |text|
+       browser.html should include text
+end
diff --git a/tests/browser/features/step_definitions/search_steps.rb 
b/tests/browser/features/step_definitions/search_steps.rb
index 78c73e9..1dd0738 100644
--- a/tests/browser/features/step_definitions/search_steps.rb
+++ b/tests/browser/features/step_definitions/search_steps.rb
@@ -84,6 +84,9 @@
     end
   end
 end
+When(/^I dump the cirrus data for (.+)$/) do |title|
+  visit(CirrusDumpPage, using_params: {page_name: title})
+end
 
 Then(/^suggestions should( not)? appear$/) do |not_appear|
   if not_appear then
diff --git a/tests/browser/features/support/pages/dump_page.rb 
b/tests/browser/features/support/pages/dump_page.rb
new file mode 100644
index 0000000..dc3702d
--- /dev/null
+++ b/tests/browser/features/support/pages/dump_page.rb
@@ -0,0 +1,7 @@
+require "cgi"
+
+class CirrusDumpPage
+  include PageObject
+
+  page_url 
URL.url("../w/index.php?title=<%=CGI.escape(params[:page_name])%>&action=cirrusdump")
+end

-- 
To view, visit https://gerrit.wikimedia.org/r/153649
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic613c2d1a77198a3d2aa7035fd93257f886a2c10
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/CirrusSearch
Gerrit-Branch: master
Gerrit-Owner: Manybubbles <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to