Yaron Koren has submitted this change and it was merged.
Change subject: Create CLI utility for ReplaceText jobs
......................................................................
Create CLI utility for ReplaceText jobs
This creates a new class (ReplaceTextSearch) to move the logic from
the special page so that it is available for CLI access. Also creates
replace.php which can be used for search-replace tasks that need to be
automated.
Change-Id: I8ccec61f570f33043d8a8d00c52b40acd9d6894a
---
M ReplaceText.php
A ReplaceTextSearch.php
M SpecialReplaceText.php
A replaceAll.php
4 files changed, 438 insertions(+), 58 deletions(-)
Approvals:
Yaron Koren: Checked; Looks good to me, approved
diff --git a/ReplaceText.php b/ReplaceText.php
index e80ba2d..0ec617e 100644
--- a/ReplaceText.php
+++ b/ReplaceText.php
@@ -47,6 +47,7 @@
$wgSpecialPageGroups['ReplaceText'] = 'wiki';
$wgAutoloadClasses['ReplaceText'] = $rtgIP . 'SpecialReplaceText.php';
$wgAutoloadClasses['ReplaceTextJob'] = $rtgIP . 'ReplaceTextJob.php';
+$wgAutoloadClasses['ReplaceTextSearch'] = $rtgIP . 'ReplaceTextSearch.php';
/**
* This function should really go into a "ReplaceText_body.php" file.
diff --git a/ReplaceTextSearch.php b/ReplaceTextSearch.php
new file mode 100644
index 0000000..73a9f62
--- /dev/null
+++ b/ReplaceTextSearch.php
@@ -0,0 +1,59 @@
+<?php
+
+class ReplaceTextSearch {
+ public static function doSearchQuery( $search, $namespaces, $category,
$prefix, $use_regex = false ) {
+ $dbr = wfGetDB( DB_SLAVE );
+ $tables = array( 'page', 'revision', 'text' );
+ $vars = array( 'page_id', 'page_namespace', 'page_title',
'old_text' );
+ if ( $use_regex ) {
+ $comparisonCond = self::regexCond( $dbr, 'old_text',
$search );
+ } else {
+ $any = $dbr->anyString();
+ $comparisonCond = 'old_text ' . $dbr->buildLike( $any,
$search, $any );
+ }
+ $conds = array(
+ $comparisonCond,
+ 'page_namespace' => $namespaces,
+ 'rev_id = page_latest',
+ 'rev_text_id = old_id'
+ );
+
+ self::categoryCondition( $category, $tables, $conds );
+ self::prefixCondition( $prefix, $conds );
+ $sort = array( 'ORDER BY' => 'page_namespace, page_title' );
+
+ return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort
);
+ }
+
+ static protected function categoryCondition( $category, &$tables,
&$conds ) {
+ if ( strval( $category ) !== '' ) {
+ $category = Title::newFromText( $category )->getDbKey();
+ $tables[] = 'categorylinks';
+ $conds[] = 'page_id = cl_from';
+ $conds['cl_to'] = $category;
+ }
+ }
+
+ static protected function prefixCondition( $prefix, &$conds ) {
+ if ( strval( $prefix ) === '' ) {
+ return;
+ }
+
+ $dbr = wfGetDB( DB_SLAVE );
+ $title = Title::newFromText( $prefix );
+ if ( !is_null( $title ) ) {
+ $prefix = $title->getDbKey();
+ }
+ $any = $dbr->anyString();
+ $conds[] = 'page_title ' . $dbr->buildLike( $prefix, $any );
+ }
+
+ static private function regexCond( $dbr, $column, $regex ) {
+ if ( $dbr instanceof DatabasePostgres ) {
+ $op = '~';
+ } else {
+ $op = 'REGEXP';
+ }
+ return "$column $op " . $dbr->addQuotes( $regex );
+ }
+}
\ No newline at end of file
diff --git a/SpecialReplaceText.php b/SpecialReplaceText.php
index 9499712..8b70dcf 100644
--- a/SpecialReplaceText.php
+++ b/SpecialReplaceText.php
@@ -128,7 +128,7 @@
// if user is replacing text within pages...
if ( $this->edit_pages ) {
- $res = $this->doSearchQuery(
+ $res = ReplaceTextSearch::doSearchQuery(
$this->target,
$this->selected_namespaces,
$this->category,
@@ -209,7 +209,7 @@
if ( $this->replacement === '' ) {
$warning_msg =
$this->msg('replacetext_blankwarning')->text();
} elseif ( count( $titles_for_edit ) > 0 ) {
- $res = $this->doSearchQuery(
$this->replacement, $this->selected_namespaces, $this->category, $this->prefix,
$this->use_regex );
+ $res =
ReplaceTextSearch::doSearchQuery( $this->replacement,
$this->selected_namespaces, $this->category, $this->prefix, $this->use_regex );
$count = $res->numRows();
if ( $count > 0 ) {
$warning_msg = $this->msg(
'replacetext_warning' )->numParams( $count )
@@ -594,61 +594,5 @@
$sort = array( 'ORDER BY' => 'page_namespace, page_title' );
return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort
);
- }
-
- function doSearchQuery( $search, $namespaces, $category, $prefix,
$use_regex = false ) {
- $dbr = wfGetDB( DB_SLAVE );
- $tables = array( 'page', 'revision', 'text' );
- $vars = array( 'page_id', 'page_namespace', 'page_title',
'old_text' );
- if ( $use_regex ) {
- $comparisonCond = $this->regexCond( $dbr, 'old_text',
$search );
- } else {
- $any = $dbr->anyString();
- $comparisonCond = 'old_text ' . $dbr->buildLike( $any,
$search, $any );
- }
- $conds = array(
- $comparisonCond,
- 'page_namespace' => $namespaces,
- 'rev_id = page_latest',
- 'rev_text_id = old_id'
- );
-
- $this->categoryCondition( $category, $tables, $conds );
- $this->prefixCondition( $prefix, $conds );
- $sort = array( 'ORDER BY' => 'page_namespace, page_title' );
-
- return $dbr->select( $tables, $vars, $conds, __METHOD__ , $sort
);
- }
-
- protected function categoryCondition( $category, &$tables, &$conds ) {
- if ( strval( $category ) !== '' ) {
- $category = Title::newFromText( $category )->getDbKey();
- $tables[] = 'categorylinks';
- $conds[] = 'page_id = cl_from';
- $conds['cl_to'] = $category;
- }
- }
-
- protected function prefixCondition( $prefix, &$conds ) {
- if ( strval( $prefix ) === '' ) {
- return;
- }
-
- $dbr = wfGetDB( DB_SLAVE );
- $title = Title::newFromText( $prefix );
- if ( !is_null( $title ) ) {
- $prefix = $title->getDbKey();
- }
- $any = $dbr->anyString();
- $conds[] = 'page_title ' . $dbr->buildLike( $prefix, $any );
- }
-
- private function regexCond( $dbr, $column, $regex ) {
- if ( $dbr instanceof DatabasePostgres ) {
- $op = '~';
- } else {
- $op = 'REGEXP';
- }
- return "$column $op " . $dbr->addQuotes( $regex );
}
}
diff --git a/replaceAll.php b/replaceAll.php
new file mode 100755
index 0000000..e869f7f
--- /dev/null
+++ b/replaceAll.php
@@ -0,0 +1,376 @@
+#!/usr/bin/php
+<?php
+/**
+ * Insert jobs into the jobqueue to replace text bits.
+ * Or execute immediately... your choice.
+ *
+ * Copyright © 2014 Mark A. Hershberger <[email protected]>
+ * https://www.mediawiki.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * PHP version 5
+ *
+ * @file
+ * @category Maintenance
+ * @package ReplaceText
+ * @author Mark A. Hershberger <[email protected]>
+ * @license GPLv3 http://www.gnu.org/copyleft/gpl.html
+ * @link https://www.mediawiki.org/wiki/Extension:Replace_Text
+ *
+ */
+
+// Copied from WebStart
+$IP = getenv( 'MW_INSTALL_PATH' );
+if ( $IP === false ) {
+ $IP = realpath( '.' ) ?: dirname( __DIR__ );
+}
+require_once "$IP/maintenance/Maintenance.php";
+
+/**
+ * Maintenance script that generates a plaintext link dump.
+ *
+ * @ingroup Maintenance
+ * @SuppressWarnings(StaticAccess)
+ * @SuppressWarnings(LongVariable)
+ */
+class ReplaceText extends Maintenance {
+ protected $user;
+ protected $target;
+ protected $replacement;
+ protected $summaryMsg;
+ protected $namespaces;
+ protected $category;
+ protected $prefix;
+ protected $useRegex;
+ protected $titles;
+ protected $defaultContinue;
+
+ public function __construct() {
+ parent::__construct();
+ $this->mDescription = "CLI utility to replace text wherever it
is ".
+ "found in the wiki.";
+
+ $this->addArg( "target", "Target text to find.", false );
+ $this->addArg( "replace", "Text to replace.", false );
+
+ $this->addOption( "dry-run", "Only find the texts, don't
replace.",
+ false, false, 'n' );
+ $this->addOption( "regex", "This is a regex (false).",
+ false, false, 'r' );
+ $this->addOption( "user", "The user to attribute this to (uid
1).",
+ false, true, 'u' );
+ $this->addOption( "yes", "Skip all prompts with an assumed
'yes'.",
+ false, false, 'y' );
+ $this->addOption( "summary", "Alternate edit summary. (%r is
where to ".
+ " place the replacement text, %f the text to look
for.)",
+ false, true, 's' );
+ $this->addOption( "ns", "Comma separated namespaces to search
in. ".
+ "(Main)" );
+ $this->addOption( "replacements", "File containing the list of
replacements to " .
+ "be made. Fields in the file are tab-separated. See
--show-file-format " .
+ "for more information.",
+ false, true, "f" );
+ $this->addOption( "show-file-format", "Show a description of
the file format to ".
+ "use with --replacements.", false, false );
+ $this->addOption( "debug", "Display replacements being made.",
false, false );
+
+ $this->addOption( "listns", "List out the namespaces on this
wiki.",
+ false, false );
+ }
+
+ protected function getUser() {
+ $userReplacing = $this->getOption( "user", 1 );
+
+ $user = is_numeric( $userReplacing ) ?
+ User::newFromId( $userReplacing ) :
+ User::newFromName( $userReplacing );
+
+ if ( get_class( $user ) !== 'User' ) {
+ $this->error(
+ "Couldn't translate '$userReplacing' to a
user.", true
+ );
+ }
+
+ return $user;
+ }
+
+ protected function getTarget() {
+ $ret = $this->getArg( 0 );
+ if ( !$ret ) {
+ $this->error( "You have to specify a target.", true );
+ }
+ return array( $ret );
+ }
+
+ protected function getReplacement() {
+ $ret = $this->getArg( 1 );
+ if ( !$ret ) {
+ $this->error( "You have to specify replacement text.",
true );
+ }
+ return array( $ret );
+ }
+
+ protected function getReplacements() {
+ $file = $this->getOption( "replacements" );
+ if ( !$file ) {
+ return false;
+ }
+
+ if ( !is_readable( $file ) ) {
+ throw new MWException( "File does not exist or is not
readable: $file\n" );
+ }
+
+ $handle = fopen( $file, "r" );
+ if ( $handle === false ) {
+ throw new MWException( "Trouble opening file: $file\n"
);
+ return false;
+ }
+
+ $this->defaultContinue = true;
+ while ( ( $line = fgets( $handle ) ) !== false ) {
+ $field = explode( "\t", $line );
+ if ( !isset( $field[1] ) ) {
+ continue;
+ }
+
+ $this->target[] = $field[0];
+ $this->replacement[] = $field[1];
+ $this->useRegex[] = isset( $field[2] ) ? true : false;
+ }
+ return true;
+ }
+
+ protected function shouldContinueByDefault() {
+ if ( !is_bool( $this->defaultContinue ) ) {
+ $this->defaultContinue =
+ $this->getOption( "yes" ) ?
+ true :
+ false;
+ }
+ return $this->defaultContinue;
+ }
+
+ protected function getSummary() {
+ $msg = wfMessage( 'replacetext_editsummary' )->
+ rawParams( $this->target )->rawParams(
$this->replacement );
+ if ( $this->getOption( "summary" ) !== null ) {
+ $msg = str_replace( array( '%f', '%r' ),
+ array( $this->target, $this->replacement ),
+ $this->getOption( "summary" ) );
+ }
+ return $msg;
+ }
+
+ protected function listNamespaces() {
+ echo "Index\tNamespace\n";
+ $nsList = MWNamespace::getCanonicalNamespaces();
+ ksort( $nsList );
+ foreach ( $nsList as $int => $val ) {
+ if ($val == "") {
+ $val = "(main)";
+ }
+ echo " $int\t$val\n";
+ }
+ }
+
+ protected function showFileFormat() {
+echo <<<EOF
+
+The format of the replacements file is tab separated with three fields.
+Any line that does not have a tab is ignored and can be considered a comment.
+
+Fields are:
+
+ 1. String to search for.
+ 2. String to replace found text with.
+ 3. (optional) The presence of this field indicates that the previous two
+ are considered a regular expression.
+
+Example:
+
+This is a comment
+TARGET REPLACE
+regex(p*) Count the Ps; \\1 true
+
+
+EOF;
+
+ }
+
+ protected function getNamespaces() {
+ $namespaces = array( NS_MAIN );
+ $names = $this->getOption( "ns" );
+ $namespace = MWNamespace::getCanonicalNamespaces();
+ $namespace[0] = "main";
+ $nsflip = array_flip( $namespace );
+ if ( $names ) {
+ $namespaces =
+ array_filter(
+ array_map(
+ function( $namespace ) use (
$namespace, $nsflip ) {
+ if ( is_numeric(
$namespace )
+ &&
isset( $namespace[ $namespace ] ) ) {
+ return intval(
$namespace );
+ }
+ $namespace =
strtolower( $namespace );
+
var_dump($nsflip[$namespace]);
+ if ( isset( $nsflip[
$namespace ] ) ) {
+ return $nsflip[
$namespace ];
+ }
+ return null;
+ }, explode( ",", $names ) ),
+ function( $val ) {
+ return $val !== null;
+ }
+ );
+ }
+ return $namespaces;
+ }
+
+ protected function getCategory() {
+ $cat = null;
+ return $cat;
+ }
+
+ protected function getPrefix() {
+ $prefix = null;
+ return $prefix;
+ }
+
+ protected function useRegex() {
+ return array( $this->getOption( "regex" ) );
+ }
+
+ protected function getTitles( $res ) {
+ if ( count( $this->titles ) == 0 ) {
+ $this->titles = array();
+ while ( $row = $res->fetchObject() ) {
+ $this->titles[] = Title::makeTitleSafe(
+ $row->page_namespace,
+ $row->page_title
+ );
+ }
+ }
+ return $this->titles;
+ }
+
+ protected function listTitles( $res ) {
+ $ret = false;
+ foreach ( $this->getTitles( $res ) as $title ) {
+ $ret = true;
+ echo "$title\n";
+ }
+ return $ret;
+ }
+
+ protected function replaceTitles( $res, $target, $replacement,
$useRegex ) {
+ foreach ( $this->getTitles( $res ) as $title ) {
+ $param = array(
+ 'target_str' => $target,
+ 'replacement_str' => $replacement,
+ 'use_regex' => $useRegex,
+ 'user_id' => $this->user->getId(),
+ 'edit_summary' => $this->summaryMsg,
+ );
+ echo "Replacing on $title... ";
+ $job = new ReplaceTextJob( $title, $param, 0 );
+ if ( $job->run() !== true ) {
+ $this->error( "Trouble on the page '$title'." );
+ }
+ echo "done.\n";
+ }
+ }
+
+ protected function getReply( $question ) {
+ $reply = "";
+ if ( $this->shouldContinueByDefault() ) {
+ return true;
+ }
+ while ( $reply !== "y" && $reply !== "n" ) {
+ $reply = $this->readconsole( "$question (Y/N) " );
+ $reply = substr( strtolower( $reply ), 0, 1 );
+ }
+ return $reply === "y";
+ }
+
+ protected function localSetup() {
+ if ( $this->getOption( "listns" ) ) {
+ $this->listNamespaces();
+ return false;
+ }
+ if ( $this->getOption( "show-file-format" ) ) {
+ $this->showFileFormat();
+ return false;
+ }
+ $this->user = $this->getUser();
+ if ( ! $this->getReplacements() ) {
+ $this->target = $this->getTarget();
+ $this->replacement = $this->getReplacement();
+ $this->useRegex = $this->useRegex();
+ }
+ $this->summaryMsg = $this->getSummary();
+ $this->namespaces = $this->getNamespaces();
+ $this->category = $this->getCategory();
+ $this->prefix = $this->getPrefix();
+ return true;
+ }
+
+ public function execute() {
+ global $wgShowExceptionDetails;
+ $wgShowExceptionDetails = true;
+
+ if ( $this->localSetup() ) {
+ foreach ( array_keys( $this->target ) as $index ) {
+ $target = $this->target[$index];
+ $replacement = $this->replacement[$index];
+ $useRegex = $this->useRegex[$index];
+
+ if ( $this->getOption( "debug" ) ) {
+ echo "Replacing '$target' with
'$replacement'";
+ if ( $useRegex ) {
+ echo " as regular expression.";
+ }
+ echo "\n";
+ }
+ $res = ReplaceTextSearch::doSearchQuery(
$target,
+ $this->namespaces, $this->category,
$this->prefix, $useRegex );
+
+ if ( $res->numRows() === 0 ) {
+ $this->error( "No targets found to
replace.", true );
+ }
+ if ( !$this->shouldContinueByDefault() &&
$this->listTitles( $res ) ) {
+ if ( !$this->getReply( "Replace
instances on these pages?" ) ) {
+ return;
+ }
+ }
+ $comment = "";
+ if ( $this->getOption( "user", null ) === null
) {
+ $comment = " (Use --user to override)";
+ }
+ if ( !$this->getReply( "Attribute changes to
the user '{$this->user}'?$comment" ) ) {
+ return;
+ }
+ if ( $res->numRows() > 0 ) {
+ $this->replaceTitles( $res, $target,
$replacement, $useRegex );
+ }
+ }
+ }
+ }
+}
+
+$maintClass = "ReplaceText";
+require_once RUN_MAINTENANCE_IF_MAIN;
--
To view, visit https://gerrit.wikimedia.org/r/164896
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I8ccec61f570f33043d8a8d00c52b40acd9d6894a
Gerrit-PatchSet: 5
Gerrit-Project: mediawiki/extensions/ReplaceText
Gerrit-Branch: master
Gerrit-Owner: MarkAHershberger <[email protected]>
Gerrit-Reviewer: MarkAHershberger <[email protected]>
Gerrit-Reviewer: Nikerabbit <[email protected]>
Gerrit-Reviewer: Yaron Koren <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits