Brion VIBBER has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/365894 )
Change subject: [WIP] Early versions of 'actor' column migration
......................................................................
[WIP] Early versions of 'actor' column migration
Creates actor table. Very incomplete.
Bug: T167246
Change-Id: I98210ea8e45a0e22ca3e3300e16df2bf69066f74
---
M RELEASE-NOTES-1.30
A includes/Actor.php
M includes/DefaultSettings.php
M includes/Revision.php
M includes/actions/HistoryAction.php
M includes/actions/InfoAction.php
M includes/installer/DatabaseUpdater.php
M includes/installer/MysqlUpdater.php
M includes/installer/SqliteUpdater.php
A maintenance/archives/patch-actor-table.sql
A maintenance/migrateActor.php
11 files changed, 366 insertions(+), 14 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/94/365894/1
diff --git a/RELEASE-NOTES-1.30 b/RELEASE-NOTES-1.30
index 87e6ce5..bd725b9 100644
--- a/RELEASE-NOTES-1.30
+++ b/RELEASE-NOTES-1.30
@@ -41,6 +41,10 @@
enabled by setting $wgUsePigLatinVariant to true.
* Added RecentChangesPurgeRows hook to allow extensions to purge data that
depends on the recentchanges table.
+* Column pairs (*_user, *_user_text) are being replaced with references
+ to a shared 'actor' table in the database.
+** Large wikis may want to use $wgActorTableSchemaMigrationStage to perform
+ the update at leisure.
=== Languages updated in 1.30 ===
diff --git a/includes/Actor.php b/includes/Actor.php
new file mode 100644
index 0000000..debad65
--- /dev/null
+++ b/includes/Actor.php
@@ -0,0 +1,192 @@
+<?php
+
+use Wikimedia\Rdbms\IDatabase;
+
+class Actor {
+ /**
+ * The actor_id row id for the actor table.
+ * @var int
+ */
+ protected $mId;
+
+ /**
+ * The actor_user relation to user_id, or null if not a local user.
+ * @var int|null
+ */
+ protected $mUserId;
+
+ /**
+ * The actor_text text form of the name.
+ * @var string
+ */
+ protected $mText;
+
+ /**
+ * Do not construct Actor instances manually; use the ActorStore
service.
+ * @param int $id row id in the actor table
+ * @param int|null $userId matching user_id in the actor table, or null
+ * @param string $text text-form name of the user/ip/import
+ */
+ public function __construct( $id, $userId, $text ) {
+ $this->mId = intval( $id );
+ $this->mUserId = is_null( $userId ) ? null : intval( $userId );
+ $this->mText = strval( $text );
+ }
+
+ /**
+ * @return int the row id for the actor table
+ */
+ public function getId() {
+ return $this->mId;
+ }
+
+ /**
+ * @return int|null user_id of the matching user, or null
+ */
+ public function getUserId() {
+ return $this->mUserId;
+ }
+
+ /**
+ * @return string text form of the username/IP/etc
+ */
+ public function getText() {
+ return $this->mText;
+ }
+
+ /**
+ * @return User|bool matching User object or nothing
+ */
+ public function getUser() {
+ if ( $this->mId ) {
+ return User::newFromId( $this->mId );
+ } else {
+ return User::newFromName( $this->mText, false );
+ }
+ }
+}
+
+class ActorStore {
+ protected $mDb;
+
+ public function __construct( IDatabase $db ) {
+ $this->mDb = $db;
+ }
+
+ /**
+ * Store an actor record if necessary given an ID and text,
+ * returning the record with id.
+ *
+ * @param int|null $userId the user id
+ * @param string $text
+ * @return Actor
+ * @throws Exception may be thrown on database error
+ */
+ public function store( $userId, $text ) {
+ if ( $userId ) {
+ $conds = [ 'actor_user' => $userId ];
+ } else {
+ $conds = [ 'actor_text' => $text ];
+ }
+ $row = $this->mDb->selectRow( 'actor',
+ [ 'actor_id', 'actor_user', 'actor_text' ],
+ $conds,
+ __METHOD__
+ );
+ if ( $row ) {
+ return new Actor( $row->actor_id, $row->actor_user,
$row->actor_text );
+ } else {
+ $id = $this->mDb->nextSequenceValue( 'actor_id_seq' );
+ $ok = $this->mDb->insert( 'actor',
+ [
+ 'actor_id' => $id,
+ 'actor_user' => $userId,
+ 'actor_text' => $text
+ ],
+ __METHOD__
+ );
+ if ( $ok ) {
+ $id = $this->mDb->insertId();
+ return new Actor( $id, $userId, $text );
+ } else {
+ throw new MWException( 'unexpected failure
inserting actor row' );
+ }
+ }
+ }
+
+ /**
+ * Provide the columns that would be needed to look up an actor
reference
+ * when manually doing a join to actor table.
+ *
+ * @return array of string column names
+ */
+ public function selectFields() {
+ return [ 'actor_id', 'actor_user', 'actor_text' ];
+ }
+
+ /**
+ * Create an Actor instance from a database row object.
+ * @param object $row a DB result row containing necessary fields
+ * @return Actor
+ */
+ public function fetchFromRow( $row ) {
+ return new Actor( $row->actor_id, $row->actor_user,
$row->actor_text );
+ }
+
+ /**
+ * Internal wrapper for fetching individual actor references.
+ *
+ * @param array $conds
+ * @return Actor|bool matching Actor instance or false if none found
+ */
+ protected function fetchByConds( array $conds ) {
+ $row = $this->mDb->selectRow( 'actor',
+ $this->selectFields(),
+ $conds,
+ __METHOD__
+ );
+ if ( $row ) {
+ return $this->fetchFromRow( $row )
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Fetch an individual actor reference by id.
+ *
+ * @param array $conds
+ * @return Actor|bool matching Actor instance or false if none found
+ */
+ public function fetchById( $id ) {
+ return $this->fetchByConds( [ 'actor_id' => $id ] );
+ }
+
+ /**
+ * Fetch an individual actor reference by user id.
+ *
+ * @param array $conds
+ * @return Actor|bool matching Actor instance or false if none found
+ */
+ public function fetchByUserId( $userId ) {
+ return $this->fetchByConds( [ 'actor_user' => $userId ] );
+ }
+
+ /**
+ * Fetch an individual actor reference by text (name/IP/etc).
+ *
+ * @param array $conds
+ * @return Actor|bool matching Actor instance or false if none found
+ */
+ public function fetchByText( $text ) {
+ return $this->fetchByConds( [ 'actor_text' => $text ] );
+ }
+
+ public function fetchByPrefix( $prefix ) {
+ return $this->fetchByConds( [ 'actor_text ' .
+ $this->mDb->buildLike( $prefix, $this->mDb->anyString()
) ] );
+ }
+}
+
+class ActorHelper {
+}
diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php
index 6ce9a66..b555cc4 100644
--- a/includes/DefaultSettings.php
+++ b/includes/DefaultSettings.php
@@ -8688,6 +8688,13 @@
$wgInterwikiPrefixDisplayTypes = [];
/**
+ * Actor table schema migration stage.
+ * @since 1.30
+ * @var int One of the MIGRATION_* constants
+ */
+$wgActorTableSchemaMigrationStage = MIGRATION_OLD;
+
+/**
* For really cool vim folding this needs to be at the end:
* vim: foldmarker=@{,@} foldmethod=marker
* @}
diff --git a/includes/Revision.php b/includes/Revision.php
index c3782ba..023d581 100644
--- a/includes/Revision.php
+++ b/includes/Revision.php
@@ -431,13 +431,25 @@
}
/**
- * Return the value of a select() page conds array for the page table.
+ * Return the value of a select() JOIN conds array for the page table.
* This will assure that the revision(s) are not orphaned from live
pages.
* @since 1.19
* @return array
*/
public static function pageJoinCond() {
return [ 'INNER JOIN', [ 'page_id = rev_page' ] ];
+ }
+
+ /**
+ * Return the value of a select() JOIN conds array for the actor table.
+ * This will get actor table rows for all users and other actor types.
+ * @since 1.30
+ * @return array
+ */
+ public static function actorJoinCond() {
+ return [ 'LEFT JOIN',
+ [ '(rev_user = actor_user) OR ((rev_user = 0) AND
rev_user_text=actor_text)' ] ];
+ //return [ 'LEFT JOIN', [ 'actor_id = rev_actor' ] ];
}
/**
@@ -625,10 +637,14 @@
// Use user_name for users and rev_user_text for IPs...
$this->mUserText = null; // lazy load if left null
- if ( $this->mUser == 0 ) {
- $this->mUserText = $row->rev_user_text; // IP
user
- } elseif ( isset( $row->user_name ) ) {
- $this->mUserText = $row->user_name; //
logged-in user
+ if ( isset( $row->actor_text ) ) {
+ $this->mUserText = $row->actor_text;
+ } else {
+ if ( $this->mUser == 0 ) {
+ $this->mUserText = $row->rev_user_text;
// IP user
+ } elseif ( isset( $row->user_name ) ) {
+ $this->mUserText = $row->user_name; //
logged-in user
+ }
}
$this->mOrigUserText = $row->rev_user_text;
} elseif ( is_array( $row ) ) {
diff --git a/includes/actions/HistoryAction.php
b/includes/actions/HistoryAction.php
index 7460340..d498509 100644
--- a/includes/actions/HistoryAction.php
+++ b/includes/actions/HistoryAction.php
@@ -479,15 +479,15 @@
$batch = new LinkBatch();
$revIds = [];
foreach ( $this->mResult as $row ) {
- if ( $row->rev_parent_id ) {
- $revIds[] = $row->rev_parent_id;
+ $rev = Revision::newFromRow( $row );
+ $parentId = $rev->getParentId();
+ if ( $parentId ) {
+ $revIds[] = $parentId;
}
- if ( !is_null( $row->user_name ) ) {
- $batch->add( NS_USER, $row->user_name );
- $batch->add( NS_USER_TALK, $row->user_name );
- } else { # for anons or usernames of imported revisions
- $batch->add( NS_USER, $row->rev_user_text );
- $batch->add( NS_USER_TALK, $row->rev_user_text
);
+ $name = $rev->getUserText( Revision::FOR_THIS_USER,
$this->getUser() );
+ if ( $name !== '' ) {
+ $batch->add( NS_USER, $name );
+ $batch->add( NS_USER_TALK, $name );
}
}
$this->parentLens = Revision::getParentLengths( $this->mDb,
$revIds );
diff --git a/includes/actions/InfoAction.php b/includes/actions/InfoAction.php
index baec944..8465d1e 100644
--- a/includes/actions/InfoAction.php
+++ b/includes/actions/InfoAction.php
@@ -704,6 +704,8 @@
self::getCacheKey( $cache, $page->getTitle(),
$page->getLatest() ),
WANObjectCache::TTL_WEEK,
function ( $oldValue, &$ttl, &$setOpts ) use ( $page,
$config, $fname ) {
+ global $wgActorTableSchemaMigrationStage;
+
$title = $page->getTitle();
$id = $title->getArticleID();
@@ -737,9 +739,20 @@
if ( $config->get( 'MiserMode' ) ) {
$result['authors'] = 0;
} else {
+ switch (
$wgActorTableSchemaMigrationStage ) {
+ case MIGRATION_OLD:
+ case MIGRATION_WRITE_BOTH:
+ $field =
'rev_user_text';
+ break;
+ case MIGRATION_WRITE_NEW:
+ case MIGRATION_NEW:
+ default:
+ $field = 'rev_actor';
+ }
+
$result['authors'] =
(int)$dbr->selectField(
'revision',
- 'COUNT(DISTINCT rev_user_text)',
+ "COUNT(DISTINCT $field)",
[ 'rev_page' => $id ],
$fname
);
diff --git a/includes/installer/DatabaseUpdater.php
b/includes/installer/DatabaseUpdater.php
index e5cbb7c..0a626c2 100644
--- a/includes/installer/DatabaseUpdater.php
+++ b/includes/installer/DatabaseUpdater.php
@@ -1191,4 +1191,18 @@
$wgContentHandlerUseDB = $this->holdContentHandlerUseDB;
}
}
+
+ protected function migrateActor() {
+ if ( !$this->updateRowExists( 'MigrateActor' ) ) {
+ $this->output(
+ "Migrating actor refs to the 'actor' table,
printing progress markers. For large\n" .
+ "databases, you may want to hit Ctrl-C and do
this manually with\n" .
+ "maintenance/migrateActor.php.\n"
+ );
+ $task = $this->maintenance->runChild( 'MigrateActor',
'migrateActor.php' );
+ $task->execute();
+ $this->output( "done.\n" );
+ }
+ }
+
}
diff --git a/includes/installer/MysqlUpdater.php
b/includes/installer/MysqlUpdater.php
index adfe2f6..7f4aeac 100644
--- a/includes/installer/MysqlUpdater.php
+++ b/includes/installer/MysqlUpdater.php
@@ -302,6 +302,10 @@
[ 'addField', 'user_groups', 'ug_expiry',
'patch-user_groups-ug_expiry.sql' ],
[ 'addIndex', 'image', 'img_user_timestamp',
'patch-image-user-index-2.sql' ],
[ 'modifyField', 'image', 'img_media_type',
'patch-add-3d.sql' ],
+
+ // 1.30
+ [ 'addTable', 'actor', 'patch-actor-table.sql' ],
+ [ 'migrateActor' ],
];
}
diff --git a/includes/installer/SqliteUpdater.php
b/includes/installer/SqliteUpdater.php
index 9c90283..5ee48b2 100644
--- a/includes/installer/SqliteUpdater.php
+++ b/includes/installer/SqliteUpdater.php
@@ -165,6 +165,10 @@
[ 'addField', 'externallinks', 'el_index_60',
'patch-externallinks-el_index_60.sql' ],
[ 'addField', 'user_groups', 'ug_expiry',
'patch-user_groups-ug_expiry.sql' ],
[ 'addIndex', 'image', 'img_user_timestamp',
'patch-image-user-index-2.sql' ],
+
+ // 1.30
+ [ 'addTable', 'actor', 'patch-actor-table.sql' ],
+ [ 'migrateActor' ],
];
}
diff --git a/maintenance/archives/patch-actor-table.sql
b/maintenance/archives/patch-actor-table.sql
new file mode 100644
index 0000000..500f88c
--- /dev/null
+++ b/maintenance/archives/patch-actor-table.sql
@@ -0,0 +1,42 @@
+--
+-- Revision authorship is represented by an actor row.
+-- This avoids duplicating common usernames/IP addresses
+-- in the revision table, and makes rename processing quicker.
+--
+CREATE TABLE /*_*/actor (
+ -- Unique ID to identify each entry
+ actor_id bigint unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
+
+ -- Key to user.user_id of the user who made this edit.
+ -- Stores NULL for anonymous edits and for some mass imports.
+ actor_user int unsigned,
+
+ -- Text username or IP address of the editor.
+ actor_text varchar(255) binary NOT NULL default ''
+) /*$wgDBTableOptions*/;
+
+
+CREATE UNIQUE INDEX /*i*/actor_user ON /*_*/actor (actor_user);
+CREATE UNIQUE INDEX /*i*/actor_text ON /*_*/actor (actor_text);
+
+
+
+
+-- TODO: maybe do these two as additions with temp tables?
+
+-- Add new fields to revision
+--ALTER TABLE revision
+-- ADD COLUMN rev_actor bigint unsigned NOT NULL;
+--CREATE INDEX /*i*/actor_timestamp ON /*_*/revision (rev_actor,rev_timestamp);
+--CREATE INDEX /*i*/page_actor_timestamp ON /*_*/revision
(rev_page,rev_actor,rev_timestamp);
+
+-- Add new fields to archive and update some bigints
+--ALTER TABLE archive
+-- ADD COLUMN ar_actor bigint unsigned
+-- AFTER ar_comment;
+
+-- TODO: image
+-- TODO: oldimage
+-- TODO: filearchive
+-- TODO: recentchanges
+-- TODO: logging
diff --git a/maintenance/migrateActor.php b/maintenance/migrateActor.php
new file mode 100644
index 0000000..2a730c7
--- /dev/null
+++ b/maintenance/migrateActor.php
@@ -0,0 +1,56 @@
+<?php
+/**
+ * Migrate actor refs from pre-1.30 columns to the 'actor' table
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+use Wikimedia\Rdbms\IDatabase;
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script that migrates actor refs from pre-1.30 columns to the
+ * 'actor' table
+ *
+ * @ingroup Maintenance
+ */
+class MigrateActor extends LoggedUpdateMaintenance {
+ public function __construct() {
+ parent::__construct();
+ $this->addDescription( 'Migrates actor refs from pre-1.30
columns to the \'actor\' table' );
+ $this->setBatchSize( 100 );
+ }
+
+ protected function getUpdateKey() {
+ return __CLASS__;
+ }
+
+ protected function updateSkippedMessage() {
+ return 'actor already migrated.';
+ }
+
+ protected function doDBUpdates() {
+ // TODO: do stuff
+ return true;
+ }
+}
+
+$maintClass = "MigrateComments";
+require_once RUN_MAINTENANCE_IF_MAIN;
--
To view, visit https://gerrit.wikimedia.org/r/365894
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I98210ea8e45a0e22ca3e3300e16df2bf69066f74
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Brion VIBBER <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits