Yaron Koren has submitted this change and it was merged.

Change subject: Create CLI utility for ReplaceText jobs
......................................................................


Create CLI utility for ReplaceText jobs

This creates a new class (ReplaceTextSearch) to move the logic from
the special page so that it is available for CLI access.  Also creates
replace.php which can be used for search-replace tasks that need to be
automated.

Change-Id: I8ccec61f570f33043d8a8d00c52b40acd9d6894a
---
M ReplaceText.php
A ReplaceTextSearch.php
M SpecialReplaceText.php
A replaceAll.php
4 files changed, 438 insertions(+), 58 deletions(-)

Approvals:
  Yaron Koren: Checked; Looks good to me, approved



diff --git a/ReplaceText.php b/ReplaceText.php
index e80ba2d..0ec617e 100644
--- a/ReplaceText.php
+++ b/ReplaceText.php
@@ -47,6 +47,7 @@
 $wgSpecialPageGroups['ReplaceText'] = 'wiki';
 $wgAutoloadClasses['ReplaceText'] = $rtgIP . 'SpecialReplaceText.php';
 $wgAutoloadClasses['ReplaceTextJob'] = $rtgIP . 'ReplaceTextJob.php';
+$wgAutoloadClasses['ReplaceTextSearch'] = $rtgIP . 'ReplaceTextSearch.php';
 
 /**
  * This function should really go into a "ReplaceText_body.php" file.
diff --git a/ReplaceTextSearch.php b/ReplaceTextSearch.php
new file mode 100644
index 0000000..73a9f62
--- /dev/null
+++ b/ReplaceTextSearch.php
@@ -0,0 +1,59 @@
+<?php
+
+class ReplaceTextSearch {
+       public static function doSearchQuery( $search, $namespaces, $category, 
$prefix, $use_regex = false ) {
+               $dbr = wfGetDB( DB_SLAVE );
+               $tables = array( 'page', 'revision', 'text' );
+               $vars = array( 'page_id', 'page_namespace', 'page_title', 
'old_text' );
+               if ( $use_regex ) {
+                       $comparisonCond = self::regexCond( $dbr, 'old_text', 
$search );
+               } else {
+                       $any = $dbr->anyString();
+                       $comparisonCond = 'old_text ' . $dbr->buildLike( $any, 
$search, $any );
+               }
+               $conds = array(
+                       $comparisonCond,
+                       'page_namespace' => $namespaces,
+                       'rev_id = page_latest',
+                       'rev_text_id = old_id'
+               );
+
+               self::categoryCondition( $category, $tables, $conds );
+               self::prefixCondition( $prefix, $conds );
+               $sort = array( 'ORDER BY' => 'page_namespace, page_title' );
+
+               return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort 
);
+       }
+
+       static protected function categoryCondition( $category, &$tables, 
&$conds ) {
+               if ( strval( $category ) !== '' ) {
+                       $category = Title::newFromText( $category )->getDbKey();
+                       $tables[] = 'categorylinks';
+                       $conds[] = 'page_id = cl_from';
+                       $conds['cl_to'] = $category;
+               }
+       }
+
+       static protected function prefixCondition( $prefix, &$conds ) {
+               if ( strval( $prefix ) === '' ) {
+                       return;
+               }
+
+               $dbr = wfGetDB( DB_SLAVE );
+               $title = Title::newFromText( $prefix );
+               if ( !is_null( $title ) ) {
+                       $prefix = $title->getDbKey();
+               }
+               $any = $dbr->anyString();
+               $conds[] = 'page_title ' . $dbr->buildLike( $prefix, $any );
+       }
+
+       static private function regexCond( $dbr, $column, $regex ) {
+               if ( $dbr instanceof DatabasePostgres ) {
+                       $op = '~';
+               } else {
+                       $op = 'REGEXP';
+               }
+               return "$column $op " . $dbr->addQuotes( $regex );
+       }
+}
\ No newline at end of file
diff --git a/SpecialReplaceText.php b/SpecialReplaceText.php
index 9499712..8b70dcf 100644
--- a/SpecialReplaceText.php
+++ b/SpecialReplaceText.php
@@ -128,7 +128,7 @@
 
                        // if user is replacing text within pages...
                        if ( $this->edit_pages ) {
-                               $res = $this->doSearchQuery(
+                               $res = ReplaceTextSearch::doSearchQuery(
                                        $this->target,
                                        $this->selected_namespaces,
                                        $this->category,
@@ -209,7 +209,7 @@
                                if ( $this->replacement === '' ) {
                                        $warning_msg = 
$this->msg('replacetext_blankwarning')->text();
                                } elseif ( count( $titles_for_edit ) > 0 ) {
-                                       $res = $this->doSearchQuery( 
$this->replacement, $this->selected_namespaces, $this->category, $this->prefix, 
$this->use_regex );
+                                       $res = 
ReplaceTextSearch::doSearchQuery( $this->replacement, 
$this->selected_namespaces, $this->category, $this->prefix, $this->use_regex );
                                        $count = $res->numRows();
                                        if ( $count > 0 ) {
                                                $warning_msg = $this->msg( 
'replacetext_warning' )->numParams( $count )
@@ -594,61 +594,5 @@
                $sort = array( 'ORDER BY' => 'page_namespace, page_title' );
 
                return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort 
);
-       }
-
-       function doSearchQuery( $search, $namespaces, $category, $prefix, 
$use_regex = false ) {
-               $dbr = wfGetDB( DB_SLAVE );
-               $tables = array( 'page', 'revision', 'text' );
-               $vars = array( 'page_id', 'page_namespace', 'page_title', 
'old_text' );
-               if ( $use_regex ) {
-                       $comparisonCond = $this->regexCond( $dbr, 'old_text', 
$search );
-               } else {
-                       $any = $dbr->anyString();
-                       $comparisonCond = 'old_text ' . $dbr->buildLike( $any, 
$search, $any );
-               }
-               $conds = array(
-                       $comparisonCond,
-                       'page_namespace' => $namespaces,
-                       'rev_id = page_latest',
-                       'rev_text_id = old_id'
-               );
-
-               $this->categoryCondition( $category, $tables, $conds );
-               $this->prefixCondition( $prefix, $conds );
-               $sort = array( 'ORDER BY' => 'page_namespace, page_title' );
-
-               return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort 
);
-       }
-
-       protected function categoryCondition( $category, &$tables, &$conds ) {
-               if ( strval( $category ) !== '' ) {
-                       $category = Title::newFromText( $category )->getDbKey();
-                       $tables[] = 'categorylinks';
-                       $conds[] = 'page_id = cl_from';
-                       $conds['cl_to'] = $category;
-               }
-       }
-
-       protected function prefixCondition( $prefix, &$conds ) {
-               if ( strval( $prefix ) === '' ) {
-                       return;
-               }
-
-               $dbr = wfGetDB( DB_SLAVE );
-               $title = Title::newFromText( $prefix );
-               if ( !is_null( $title ) ) {
-                       $prefix = $title->getDbKey();
-               }
-               $any = $dbr->anyString();
-               $conds[] = 'page_title ' . $dbr->buildLike( $prefix, $any );
-       }
-
-       private function regexCond( $dbr, $column, $regex ) {
-               if ( $dbr instanceof DatabasePostgres ) {
-                       $op = '~';
-               } else {
-                       $op = 'REGEXP';
-               }
-               return "$column $op " . $dbr->addQuotes( $regex );
        }
 }
diff --git a/replaceAll.php b/replaceAll.php
new file mode 100755
index 0000000..e869f7f
--- /dev/null
+++ b/replaceAll.php
@@ -0,0 +1,376 @@
+#!/usr/bin/php
+<?php
+/**
+ * Insert jobs into the jobqueue to replace text bits.
+ * Or execute immediately... your choice.
+ *
+ * Copyright © 2014 Mark A. Hershberger <[email protected]>
+ * https://www.mediawiki.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * PHP version 5
+ *
+ * @file
+ * @category Maintenance
+ * @package  ReplaceText
+ * @author   Mark A. Hershberger <[email protected]>
+ * @license  GPLv3 http://www.gnu.org/copyleft/gpl.html
+ * @link     https://www.mediawiki.org/wiki/Extension:Replace_Text
+ *
+ */
+
+// Copied from WebStart
+$IP = getenv( 'MW_INSTALL_PATH' );
+if ( $IP === false ) {
+       $IP = realpath( '.' ) ?: dirname( __DIR__ );
+}
+require_once "$IP/maintenance/Maintenance.php";
+
+/**
+ * Maintenance script that generates a plaintext link dump.
+ *
+ * @ingroup Maintenance
+ * @SuppressWarnings(StaticAccess)
+ * @SuppressWarnings(LongVariable)
+ */
+class ReplaceText extends Maintenance {
+       protected $user;
+       protected $target;
+       protected $replacement;
+       protected $summaryMsg;
+       protected $namespaces;
+       protected $category;
+       protected $prefix;
+       protected $useRegex;
+       protected $titles;
+       protected $defaultContinue;
+
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = "CLI utility to replace text wherever it 
is ".
+                       "found in the wiki.";
+
+               $this->addArg( "target", "Target text to find.", false );
+               $this->addArg( "replace", "Text to replace.", false );
+
+               $this->addOption( "dry-run", "Only find the texts, don't 
replace.",
+                       false, false, 'n' );
+               $this->addOption( "regex", "This is a regex (false).",
+                       false, false, 'r' );
+               $this->addOption( "user", "The user to attribute this to (uid 
1).",
+                       false, true, 'u' );
+               $this->addOption( "yes", "Skip all prompts with an assumed 
'yes'.",
+                       false, false, 'y' );
+               $this->addOption( "summary", "Alternate edit summary. (%r is 
where to ".
+                       " place the replacement text, %f the text to look 
for.)",
+                       false, true, 's' );
+               $this->addOption( "ns", "Comma separated namespaces to search 
in. ".
+                       "(Main)" );
+               $this->addOption( "replacements", "File containing the list of 
replacements to " .
+                       "be made.  Fields in the file are tab-separated.  See 
--show-file-format " .
+                       "for more information.",
+                       false, true, "f" );
+               $this->addOption( "show-file-format", "Show a description of 
the file format to ".
+                       "use with --replacements.", false, false );
+               $this->addOption( "debug", "Display replacements being made.", 
false, false );
+
+               $this->addOption( "listns", "List out the namespaces on this 
wiki.",
+                       false, false );
+       }
+
+       protected function getUser() {
+               $userReplacing = $this->getOption( "user", 1 );
+
+               $user = is_numeric( $userReplacing ) ?
+                       User::newFromId( $userReplacing ) :
+                       User::newFromName( $userReplacing );
+
+               if ( get_class( $user ) !== 'User' ) {
+                       $this->error(
+                               "Couldn't translate '$userReplacing' to a 
user.", true
+                       );
+               }
+
+               return $user;
+       }
+
+       protected function getTarget() {
+               $ret = $this->getArg( 0 );
+               if ( !$ret ) {
+                       $this->error( "You have to specify a target.", true );
+               }
+               return array( $ret );
+       }
+
+       protected function getReplacement() {
+               $ret = $this->getArg( 1 );
+               if ( !$ret ) {
+                       $this->error( "You have to specify replacement text.", 
true );
+               }
+               return array( $ret );
+       }
+
+       protected function getReplacements() {
+               $file = $this->getOption( "replacements" );
+               if ( !$file ) {
+                       return false;
+               }
+
+               if ( !is_readable( $file ) ) {
+                       throw new MWException( "File does not exist or is not 
readable: $file\n" );
+               }
+
+               $handle = fopen( $file, "r" );
+               if ( $handle === false ) {
+                       throw new MWException( "Trouble opening file: $file\n" 
);
+                       return false;
+               }
+
+               $this->defaultContinue = true;
+               while ( ( $line = fgets( $handle ) ) !== false ) {
+                       $field = explode( "\t", $line );
+                       if ( !isset( $field[1] ) ) {
+                               continue;
+                       }
+
+                       $this->target[] = $field[0];
+                       $this->replacement[] = $field[1];
+                       $this->useRegex[] = isset( $field[2] ) ? true : false;
+               }
+               return true;
+       }
+
+       protected function shouldContinueByDefault() {
+               if ( !is_bool( $this->defaultContinue ) ) {
+                       $this->defaultContinue =
+                               $this->getOption( "yes" ) ?
+                               true :
+                               false;
+               }
+               return $this->defaultContinue;
+       }
+
+       protected function getSummary() {
+               $msg = wfMessage( 'replacetext_editsummary' )->
+                       rawParams( $this->target )->rawParams( 
$this->replacement );
+               if ( $this->getOption( "summary" ) !== null ) {
+                       $msg = str_replace( array( '%f', '%r' ),
+                               array( $this->target, $this->replacement ),
+                               $this->getOption( "summary" ) );
+               }
+               return $msg;
+       }
+
+       protected function listNamespaces() {
+               echo "Index\tNamespace\n";
+               $nsList = MWNamespace::getCanonicalNamespaces();
+               ksort( $nsList );
+               foreach ( $nsList as $int => $val ) {
+                       if ($val == "") {
+                               $val = "(main)";
+                       }
+                       echo " $int\t$val\n";
+               }
+       }
+
+       protected function showFileFormat() {
+echo <<<EOF
+
+The format of the replacements file is tab separated with three fields.
+Any line that does not have a tab is ignored and can be considered a comment.
+
+Fields are:
+
+ 1. String to search for.
+ 2. String to replace found text with.
+ 3. (optional) The presence of this field indicates that the previous two
+       are considered a regular expression.
+
+Example:
+
+This is a comment
+TARGET REPLACE
+regex(p*)      Count the Ps; \\1       true
+
+
+EOF;
+
+       }
+
+       protected function getNamespaces() {
+               $namespaces = array( NS_MAIN );
+               $names = $this->getOption( "ns" );
+               $namespace = MWNamespace::getCanonicalNamespaces();
+               $namespace[0] = "main";
+               $nsflip = array_flip( $namespace );
+               if ( $names ) {
+                       $namespaces =
+                               array_filter(
+                                       array_map(
+                                               function( $namespace ) use ( 
$namespace, $nsflip ) {
+                                                       if ( is_numeric( 
$namespace )
+                                                                       && 
isset( $namespace[ $namespace ] ) ) {
+                                                               return intval( 
$namespace );
+                                                       }
+                                                       $namespace = 
strtolower( $namespace );
+                                                               
var_dump($nsflip[$namespace]);
+                                                       if ( isset( $nsflip[ 
$namespace ] ) ) {
+                                                               return $nsflip[ 
$namespace ];
+                                                       }
+                                                       return null;
+                                               }, explode( ",", $names ) ),
+                                       function( $val ) {
+                                               return $val !== null;
+                                       }
+                               );
+               }
+               return $namespaces;
+       }
+
+       protected function getCategory() {
+               $cat = null;
+               return $cat;
+       }
+
+       protected function getPrefix() {
+               $prefix = null;
+               return $prefix;
+       }
+
+       protected function useRegex() {
+               return array( $this->getOption( "regex" ) );
+       }
+
+       protected function getTitles( $res ) {
+               if ( count( $this->titles ) == 0 ) {
+                       $this->titles = array();
+                       while ( $row = $res->fetchObject() ) {
+                               $this->titles[] = Title::makeTitleSafe(
+                                       $row->page_namespace,
+                                       $row->page_title
+                               );
+                       }
+               }
+               return $this->titles;
+       }
+
+       protected function listTitles( $res ) {
+               $ret = false;
+               foreach ( $this->getTitles( $res ) as $title ) {
+                       $ret = true;
+                       echo "$title\n";
+               }
+               return $ret;
+       }
+
+       protected function replaceTitles( $res, $target, $replacement, 
$useRegex ) {
+               foreach ( $this->getTitles( $res ) as $title ) {
+                       $param = array(
+                               'target_str'      => $target,
+                               'replacement_str' => $replacement,
+                               'use_regex'       => $useRegex,
+                               'user_id'         => $this->user->getId(),
+                               'edit_summary'    => $this->summaryMsg,
+                       );
+                       echo "Replacing on $title... ";
+                       $job = new ReplaceTextJob( $title, $param, 0 );
+                       if ( $job->run() !== true ) {
+                               $this->error( "Trouble on the page '$title'." );
+                       }
+                       echo "done.\n";
+               }
+       }
+
+       protected function getReply( $question ) {
+               $reply = "";
+               if ( $this->shouldContinueByDefault() ) {
+                       return true;
+               }
+               while ( $reply !== "y" && $reply !== "n" ) {
+                       $reply = $this->readconsole( "$question (Y/N) " );
+                       $reply = substr( strtolower( $reply ), 0, 1 );
+               }
+               return $reply === "y";
+       }
+
+       protected function localSetup() {
+               if ( $this->getOption( "listns" ) ) {
+                       $this->listNamespaces();
+                       return false;
+               }
+               if ( $this->getOption( "show-file-format" ) ) {
+                       $this->showFileFormat();
+                       return false;
+               }
+               $this->user = $this->getUser();
+               if ( ! $this->getReplacements() ) {
+                       $this->target = $this->getTarget();
+                       $this->replacement = $this->getReplacement();
+                       $this->useRegex = $this->useRegex();
+               }
+               $this->summaryMsg = $this->getSummary();
+               $this->namespaces = $this->getNamespaces();
+               $this->category = $this->getCategory();
+               $this->prefix = $this->getPrefix();
+               return true;
+       }
+
+       public function execute() {
+               global $wgShowExceptionDetails;
+               $wgShowExceptionDetails = true;
+
+               if ( $this->localSetup() ) {
+                       foreach ( array_keys( $this->target ) as $index ) {
+                               $target = $this->target[$index];
+                               $replacement = $this->replacement[$index];
+                               $useRegex = $this->useRegex[$index];
+
+                               if ( $this->getOption( "debug" ) ) {
+                                       echo "Replacing '$target' with 
'$replacement'";
+                                       if ( $useRegex ) {
+                                               echo " as regular expression.";
+                                       }
+                                       echo "\n";
+                               }
+                               $res = ReplaceTextSearch::doSearchQuery( 
$target,
+                                       $this->namespaces, $this->category, 
$this->prefix, $useRegex );
+
+                               if ( $res->numRows() === 0 ) {
+                                       $this->error( "No targets found to 
replace.", true );
+                               }
+                               if ( !$this->shouldContinueByDefault() && 
$this->listTitles( $res ) ) {
+                                       if ( !$this->getReply( "Replace 
instances on these pages?" ) ) {
+                                               return;
+                                       }
+                               }
+                               $comment = "";
+                               if ( $this->getOption( "user", null ) === null 
) {
+                                       $comment = " (Use --user to override)";
+                               }
+                               if ( !$this->getReply( "Attribute changes to 
the user '{$this->user}'?$comment" ) ) {
+                                       return;
+                               }
+                               if ( $res->numRows() > 0 ) {
+                                       $this->replaceTitles( $res, $target, 
$replacement, $useRegex );
+                               }
+                       }
+               }
+       }
+}
+
+$maintClass = "ReplaceText";
+require_once RUN_MAINTENANCE_IF_MAIN;

-- 
To view, visit https://gerrit.wikimedia.org/r/164896
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I8ccec61f570f33043d8a8d00c52b40acd9d6894a
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/ReplaceText
Gerrit-Branch: master
Gerrit-Owner: MarkAHershberger <[email protected]>
Gerrit-Reviewer: MarkAHershberger <[email protected]>
Gerrit-Reviewer: Nikerabbit <[email protected]>
Gerrit-Reviewer: Yaron Koren <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to