jenkins-bot has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/394816 )
Change subject: Reattribute edits to imported users.
......................................................................
Reattribute edits to imported users.
Edits and log entries which previously had user id 0 but the username of
the imported user would stay not-reattributed, which meant it wouldn't
show up in Special:Contributions. Now we (optionally) attribute edits as
well. A maintenance script is provided which can reattribute all edits,
and does not require this extension to be installed to run.
Additionally, a new config option dictates whether or not to import the
watchlist.
Change-Id: Ifd26850a9fdf36deaf3c4408375c1dbd9f80eb86
---
M ExternalWikiPrimaryAuthenticationProvider.php
M PopulateImportedWatchlistJob.php
A ReattributeImportedEditsJob.php
M extension.json
A maintenance/reattributeImportedEdits.php
5 files changed, 228 insertions(+), 27 deletions(-)
Approvals:
Skizzerz: Looks good to me, approved
jenkins-bot: Verified
diff --git a/ExternalWikiPrimaryAuthenticationProvider.php
b/ExternalWikiPrimaryAuthenticationProvider.php
index 45761f5..4de21dd 100644
--- a/ExternalWikiPrimaryAuthenticationProvider.php
+++ b/ExternalWikiPrimaryAuthenticationProvider.php
@@ -160,35 +160,81 @@
];
$watchlist = [];
-
- while ( true ) {
- $resp = $this->apiRequest( 'GET', $wrquery, [],
__METHOD__ );
- $watchlist = array_merge( $watchlist,
$resp->watchlistraw );
-
- if ( !isset( $resp->{'query-continue'} ) ) {
- break;
- }
-
- $wrquery['wrcontinue'] =
$resp->{'query-continue'}->watchlistraw->wrcontinue;
- }
-
- // enqueue jobs to actually add the watchlist pages to the
user, since there might be a lot of them
$pagesPerJob = (int)$this->config->get( 'UpdateRowsPerJob' );
- if ( $pagesPerJob <= 0 ) {
- $this->logger->warning( '$wgUpdateRowsPerJob is set to
0 or a negative value; importing watchlist in batches of 300 instead.' );
- $pagesPerJob = 300;
+ if ( $pagesPerJob < 100 ) {
+ $this->logger->warning( '$wgUpdateRowsPerJob is set to
less than 100; performing jobs in batches of 100 instead.' );
+ $pagesPerJob = 100;
}
+ $dbw = wfGetDB( DB_MASTER );
+ $dbr = wfGetDB( DB_REPLICA );
$jobs = [];
$title = $user->getUserPage(); // not used by us, but Job
constructor needs a valid Title
- while ( $watchlist ) {
- // array_splice reduces the size of $watchlist and
returns the removed elements.
- // This avoids memory bloat so that we only keep the
watchlist resident in memory one time.
- $slice = array_splice( $watchlist, 0, $pagesPerJob );
- $jobs[] = new PopulateImportedWatchlistJob( $title, [
'username' => $user->getName(), 'pages' => $slice ] );
+
+ // enqueue jobs to actually add watchlist items and to
reattribute already-existing edits (if enabled)
+ if ( $this->config->get( 'MediaWikiAuthImportWatchlist' ) ) {
+ while ( true ) {
+ $resp = $this->apiRequest( 'GET', $wrquery, [],
__METHOD__ );
+ $watchlist = array_merge( $watchlist,
$resp->watchlistraw );
+
+ if ( !isset( $resp->{'query-continue'} ) ) {
+ break;
+ }
+
+ $wrquery['wrcontinue'] =
$resp->{'query-continue'}->watchlistraw->wrcontinue;
+ }
+
+ while ( $watchlist ) {
+ // array_splice reduces the size of $watchlist
and returns the removed elements.
+ // This avoids memory bloat so that we only
keep the watchlist resident in memory one time.
+ $slice = array_splice( $watchlist, 0,
$pagesPerJob );
+ $jobs[] = new PopulateImportedWatchlistJob(
$title, [ 'username' => $user->getName(), 'pages' => $slice ] );
+ }
}
- \JobQueueGroup::singleton()->push( $jobs );
+ if ( $this->config->get( 'MediaWikiAuthReattributeEdits' ) ) {
+ foreach ( ReattributeImportedEdits::getTableMetadata()
as $table => $metadata ) {
+ $idKey = $metadata[0];
+
+ foreach ( $metadata[1] as $nameKey => $fields )
{
+ $idEnd = true; // so next loop doesn't
terminate immediately
+
+ for ( $offset = 0; $idEnd !== false;
$offset += $pagesPerJob ) {
+ // this is being thrown in the
job queue anyway, so up-to-date data isn't required
+ // any newly-imported revs/logs
will see our new user and attribute properly anyway
+ $idStart = $dbr->selectField(
+ $table,
+ $idKey,
+ '', // no WHERE clause
+ __METHOD__ . ':idStart',
+ [ 'ORDER BY' => $idKey,
'OFFSET' => $offset ]
+ );
+
+ $idEnd = $dbr->selectField(
+ $table,
+ $idKey,
+ '', // no WHERE clause
+ __METHOD__ . ':idEnd',
+ [ 'ORDER BY' => $idKey,
'OFFSET' => $offset + $pagesPerJob - 1 ]
+ );
+
+ $jobs[] = new
ReattributeImportedEditsJob( $title, [
+ 'username' =>
$user->getName(),
+ 'id_start' => $idStart,
+ 'id_end' => $idEnd,
+ 'table' => $table,
+ 'idkey' => $idKey,
+ 'namekey' => $nameKey,
+ 'fields' => $fields
+ ] );
+ }
+ }
+ }
+ }
+
+ if ( $jobs !== [] ) {
+ \JobQueueGroup::singleton()->push( $jobs );
+ }
// groupmemberships contains groups and expiries, but is only
present in recent versions of MW. Fall back to groups if it doesn't exist.
$validGroups = array_diff( array_keys( $this->config->get(
'GroupPermissions' ) ), $this->config->get( 'ImplicitGroups' ) );
@@ -246,7 +292,6 @@
}
// editcount and registrationdate cannot be set via methods on
User
- $dbw = wfGetDB( DB_MASTER );
$dbw->update(
'user',
[
diff --git a/PopulateImportedWatchlistJob.php b/PopulateImportedWatchlistJob.php
index 05a8984..37d7c62 100644
--- a/PopulateImportedWatchlistJob.php
+++ b/PopulateImportedWatchlistJob.php
@@ -39,5 +39,7 @@
// we may wish to import changed as well (as
wl_notificationtimestamp in the db). Note that $page->changed may not exists,
// need to test with if ( isset( $page->changed ) )
before doing anything with it.
}
+
+ return true;
}
}
diff --git a/ReattributeImportedEditsJob.php b/ReattributeImportedEditsJob.php
new file mode 100644
index 0000000..32813e0
--- /dev/null
+++ b/ReattributeImportedEditsJob.php
@@ -0,0 +1,52 @@
+<?php
+
+namespace MediaWikiAuth;
+
+use User;
+
+class ReattributeImportedEditsJob extends \Job {
+ /**
+ * Construct a new edit reattribution job.
+ *
+ * @param $title Title unused
+ * @param $params Array of the format [
+ * 'username' => string username of the user whose edits we are
reattributing
+ * 'id_start' => mixed id of the revision/log we're starting at to
reattribute
+ * 'id_end' => mixed id of the revision/log we're ending at
(inclusive)
+ * 'table' => string table name to operate on (without prefix)
+ * 'idkey' => string field containing table id
+ * 'namekey' => string field containing username to look up
+ * 'fields' => array of string fields containing user ids to modify
+ * ]
+ */
+ public function __construct( $title, $params ) {
+ parent::__construct( 'reattributeImportedEdits', $title,
$params );
+ }
+
+ public function run() {
+ $user = User::newFromName( $this->params['username'] );
+ if ( $user === null || $user->getId() === 0 ) {
+ throw new \BadMethodCallException( "Attempting to
reattribute edits for nonexistent user {$this->params['username']}." );
+ }
+
+ $updateFields = array_fill_keys( $this->params['fields'],
$user->getId() );
+
+ $dbw = wfGetDB( DB_MASTER );
+ $conds = [ $this->params['namekey'] => $user->getName() ];
+ $id1 = $dbw->addQuotes( $this->params['id_start'] );
+ $id2 = $dbw->addQuotes( $this->params['id_end'] );
+
+ if ( $this->params['id_start'] === false &&
$this->params['id_end'] !== false ) {
+ $conds[] = "{$this->params['idkey']} <= {$id2}";
+ } elseif ( $this->params['id_start'] !== false &&
$this->params['id_end'] === false ) {
+ $conds[] = "{$this->params['idkey']} >= {$id1}";
+ } elseif ( $this->params['id_start'] !== false &&
$this->params['id_end'] !== false ) {
+ $conds[] = "{$this->params['idkey']} BETWEEN {$id1} AND
{$id2}";
+ }
+
+ $dbw->update( $this->params['table'], $updateFields, $conds,
__METHOD__ );
+
+ return true;
+ }
+
+}
diff --git a/extension.json b/extension.json
index 73591cc..d62130b 100644
--- a/extension.json
+++ b/extension.json
@@ -1,6 +1,6 @@
{
"name": "MediaWikiAuth",
- "version": "0.10.0",
+ "version": "1.0.0",
"author": [
"Laurence Parry",
"Jack Phoenix",
@@ -14,7 +14,9 @@
"config": {
"MediaWikiAuthAllowPasswordChange": false,
"MediaWikiAuthApiUrl": "",
- "MediaWikiAuthImportGroups": true
+ "MediaWikiAuthImportGroups": true,
+ "MediaWikiAuthImportWatchlist": true,
+ "MediaWikiAuthReattributeEdits": true
},
"MessagesDirs": {
"MediaWikiAuth": [
@@ -23,13 +25,16 @@
},
"AutoloadClasses": {
"MediaWikiAuth\\ExternalWikiPrimaryAuthenticationProvider":
"ExternalWikiPrimaryAuthenticationProvider.php",
- "MediaWikiAuth\\PopulateImportedWatchlistJob":
"PopulateImportedWatchlistJob.php"
+ "MediaWikiAuth\\PopulateImportedWatchlistJob":
"PopulateImportedWatchlistJob.php",
+ "MediaWikiAuth\\ReattributeImportedEdits":
"maintenance/reattributeImportedEdits.php",
+ "MediaWikiAuth\\ReattributeImportedEditsJob":
"ReattributeImportedEditsJob.php"
},
"AvailableRights": [
"mwa-createlocalaccount"
],
"JobClasses": {
- "populateImportedWatchlist":
"MediaWikiAuth\\PopulateImportedWatchlistJob"
+ "populateImportedWatchlist":
"MediaWikiAuth\\PopulateImportedWatchlistJob",
+ "reattributeImportedEdits":
"MediaWikiAuth\\ReattributeImportedEditsJob"
},
"AuthManagerAutoConfig": {
"primaryauth": {
diff --git a/maintenance/reattributeImportedEdits.php
b/maintenance/reattributeImportedEdits.php
new file mode 100644
index 0000000..7fb8c7e
--- /dev/null
+++ b/maintenance/reattributeImportedEdits.php
@@ -0,0 +1,97 @@
+<?php
+
+namespace MediaWikiAuth;
+
+use Wikimedia\Rdbms\Database;
+
+if ( getenv( 'MW_INSTALL_PATH' ) ) {
+ $IP = getenv( 'MW_INSTALL_PATH' );
+} else {
+ $IP = __DIR__ . '/../../..';
+}
+
+require_once "$IP/maintenance/Maintenance.php";
+
+class ReattributeImportedEdits extends \Maintenance {
+ const OPT_USER = 'user';
+
+ public function __construct() {
+ parent::__construct();
+
+ $this->addOption(
+ self::OPT_USER,
+ 'Username to update. If not specified, all users will
be updated.',
+ false, // not required
+ true // requires argument
+ );
+ }
+
+ public function execute() {
+ $dbw = wfGetDB( DB_MASTER );
+ $singleUser = false;
+
+ if ( $this->hasOption( self::OPT_USER ) ) {
+ $user = \User::newFromName( $this->getOption(
self::OPT_USER ) );
+
+ if ( $user === null || $user->getId() === 0 ) {
+ $this->error( "User {$user} does not exist.\n",
1 );
+ return; // never actually get here; error()
calls die()
+ }
+
+ $singleUser = $user->getName();
+ }
+
+ foreach ( self::getTableMetadata() as $table => $metadata ) {
+ foreach ( $metadata[1] as $nameKey => $fields ) {
+ // not every DMBS supports joins on update, and
those that do all
+ // do it different ways. Subqueries are
therefore more portable.
+ $conds = array_fill_keys( $fields, 0 );
+ $setList = [];
+
+ $subquery = $dbw->selectSQLText(
+ 'user',
+ 'user_id',
+ "user_name = $nameKey",
+ __METHOD__ . ':subquery'
+ );
+
+ if ( $singleUser !== false ) {
+ $conds[$nameKey] = $singleUser;
+ } else {
+ $conds[] = "EXISTS($subquery)";
+ }
+
+ foreach ( $fields as $field ) {
+ $setList[] = "$field = ($subquery)";
+ }
+
+ $this->output( "Updating {$table} (this may
take a few minutes)...\n" );
+ $success = $dbw->update( $table, $setList,
$conds, __METHOD__ . ':update' );
+
+ if ( $success ) {
+ $rows = $dbw->affectedRows();
+ $this->output( "Updated {$rows} records
on {$table}.\n" );
+ } else {
+ $this->error( "Unable to update table
{$table}.\n" );
+ }
+ }
+ }
+ }
+
+ public static function getTableMetadata() {
+ // Note that only tables which are used in the XML dump import
process (plus recentchanges) are updated.
+ return [
+ 'archive' => [ 'ar_id', [ 'ar_user_text' => [ 'ar_user'
] ] ],
+ 'filearchive' => [ 'fa_id', [ 'fa_user_text' => [
'fa_user' ] ] ],
+ // img_name is the PK, and PKs are clustered on InnoDB,
so we can sensibly use BETWEEN
+ 'image' => [ 'img_name', [ 'img_user_text' => [
'img_user' ] ] ],
+ 'logging' => [ 'log_id', [ 'log_user_text' => [
'log_user' ] ] ],
+ 'oldimage' => [ 'oi_name', [ 'oi_user_text' => [
'oi_user' ] ] ],
+ 'recentchanges' => [ 'rc_id', [ 'rc_user_text' => [
'rc_user' ] ] ],
+ 'revision' => [ 'rev_id', [ 'rev_user_text' => [
'rev_user' ] ] ]
+ ];
+ }
+}
+
+$maintClass = 'MediaWikiAuth\ReattributeImportedEdits';
+require_once RUN_MAINTENANCE_IF_MAIN;
--
To view, visit https://gerrit.wikimedia.org/r/394816
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ifd26850a9fdf36deaf3c4408375c1dbd9f80eb86
Gerrit-PatchSet: 3
Gerrit-Project: mediawiki/extensions/MediaWikiAuth
Gerrit-Branch: master
Gerrit-Owner: Skizzerz <[email protected]>
Gerrit-Reviewer: Daniel Friesen <[email protected]>
Gerrit-Reviewer: Legoktm <[email protected]>
Gerrit-Reviewer: Skizzerz <[email protected]>
Gerrit-Reviewer: jenkins-bot <>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits