Catrope has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/405375 )
Change subject: [WIP] Change tag schema normalization
......................................................................
[WIP] Change tag schema normalization
Sketch of how this would work. Doesn't deal with populating
ct_tag_id yet.
Bug: T185355
Change-Id: Ie3dfa28e40f16061466b7d5954fedda63764b66f
---
M autoload.php
M includes/DefaultSettings.php
M includes/api/ApiQueryTags.php
M includes/changetags/ChangeTags.php
M includes/installer/MssqlUpdater.php
M includes/installer/MysqlUpdater.php
M includes/installer/OracleUpdater.php
M includes/installer/PostgresUpdater.php
M includes/installer/SqliteUpdater.php
D maintenance/archives/patch-change_tag_statistics.sql
A maintenance/archives/patch-ct_tag_id.sql
A maintenance/archives/patch-tag_table.sql
D maintenance/mssql/archives/patch-change_tag_statistics.sql
A maintenance/mssql/archives/patch-ct_tag_id.sql
A maintenance/mssql/archives/patch-tag_table.sql
M maintenance/mssql/tables.sql
D maintenance/oracle/archives/patch-change_tag_statistics.sql
A maintenance/oracle/archives/patch-ct_tag_id.sql
A maintenance/oracle/archives/patch-tag_table.sql
M maintenance/oracle/tables.sql
A maintenance/populateTagTable.php
D maintenance/populateTagUsageStatistics.php
D maintenance/postgres/archives/patch-change_tag_statistics.sql
A maintenance/postgres/archives/patch-ct_tag_id.sql
A maintenance/postgres/archives/patch-tag_table.sql
M maintenance/postgres/tables.sql
M maintenance/tables.sql
27 files changed, 275 insertions(+), 227 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/75/405375/1
diff --git a/autoload.php b/autoload.php
index c392ee8..e15bc23 100644
--- a/autoload.php
+++ b/autoload.php
@@ -1143,7 +1143,7 @@
'PopulateRecentChangesSource' => __DIR__ .
'/maintenance/populateRecentChangesSource.php',
'PopulateRevisionLength' => __DIR__ .
'/maintenance/populateRevisionLength.php',
'PopulateRevisionSha1' => __DIR__ .
'/maintenance/populateRevisionSha1.php',
- 'PopulateTagUsageStatistics' => __DIR__ .
'/maintenance/populateTagUsageStatistics.php',
+ 'PopulateTagTable' => __DIR__ . '/maintenance/populateTagTable.php',
'PostgreSqlLockManager' => __DIR__ .
'/includes/libs/lockmanager/PostgreSqlLockManager.php',
'PostgresInstaller' => __DIR__ .
'/includes/installer/PostgresInstaller.php',
'PostgresUpdater' => __DIR__ .
'/includes/installer/PostgresUpdater.php',
diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php
index c30f453..3eb5869 100644
--- a/includes/DefaultSettings.php
+++ b/includes/DefaultSettings.php
@@ -6966,16 +6966,6 @@
];
/**
- * Temporary feature flag to handle deploy of change_tag_statistics table
- * For a live deploy, the following is recommended:
- * - create the change_tag_statistics table with patch-change_tag_statistics
- * - set $wgUseChangeTagStatisticsTable to 1: update-only mode
- * - run maintenance script populateTagUsageStatistics.php
- * - set $wgUseChangeTagStatisticsTable to 2: full mode
- */
-$wgUseChangeTagStatisticsTable = 0;
-
-/**
* If set to an integer, pages that are watched by this many users or more
* will not require the unwatchedpages permission to view the number of
* watchers.
@@ -8819,6 +8809,21 @@
*/
$wgCommentTableSchemaMigrationStage = MIGRATION_OLD;
+//$wgUseChangeTagStatisticsTable = 0;
+
+/**
+ * change_tag table schema migration stage.
+ *
+ * MIGRATION_OLD: Do not use tag table or ct_tag_id.
+ * MIGRATION_WRITE_BOTH: Write to the tag table and ct_tag_id, but read from
the old schema
+ * MIGRATION_WRITE_NEW: Behaves the same as MIGRATION_WRITE_BOTH
+ * MIGRATION_NEW: Use the tag table and ct_tag_id, do read/write ct_tag
+ *
+ * @since 1.31
+ * @var int One of the MIGRATION_* constants
+ */
+$wgChangeTagsSchemaMigrationStage = MIGRATION_OLD;
+
/**
* For really cool vim folding this needs to be at the end:
* vim: foldmarker=@{,@} foldmethod=marker
diff --git a/includes/api/ApiQueryTags.php b/includes/api/ApiQueryTags.php
index 54fff2a..eca1688 100644
--- a/includes/api/ApiQueryTags.php
+++ b/includes/api/ApiQueryTags.php
@@ -36,7 +36,7 @@
}
public function execute() {
- global $wgUseChangeTagStatisticsTable;
+ global $wgChangeTagsSchemaMigrationStage;
$params = $this->extractRequestParams();
$prop = array_flip( $params['prop'] );
@@ -56,19 +56,19 @@
$explicitlyDefinedTags = array_fill_keys(
ChangeTags::listExplicitlyDefinedTags(), 0 );
$softwareActivatedTags = array_fill_keys(
ChangeTags::listSoftwareActivatedTags(), 0 );
- if ( $wgUseChangeTagStatisticsTable > 1 ) {
- $this->addTables( 'change_tag_statistics' );
- $this->addFields( [ 'cts_tag', 'cts_count',
'cts_timestamp' ] );
+ if ( $wgChangeTagsSchemaMigrationStage === MIGRATION_NEW ) {
+ $this->addTables( 'tag' );
+ $this->addFields( [ 'tag_name', 'tag_count',
'tag_timestamp' ] );
$this->addOption( 'LIMIT', $limit + 1 );
- $this->addWhereRange( 'cts_tag', 'newer',
$params['continue'], null );
+ $this->addWhereRange( 'tag_name', 'newer',
$params['continue'], null );
$res = $this->select( __METHOD__ );
$hitCounts = [];
$timestamps = [];
foreach ( $res as $row ) {
- if ( $row->cts_count ) {
- $hitCounts[$row->cts_tag] =
(int)$row->cts_count;
- $timestamps[$row->cts_tag] =
(string)$row->cts_timestamp;
+ if ( $row->tag_count ) {
+ $hitCounts[$row->tag_name] =
(int)$row->tag_count;
+ $timestamps[$row->tag_name] =
(string)$row->tag_timestamp;
}
}
} else {
diff --git a/includes/changetags/ChangeTags.php
b/includes/changetags/ChangeTags.php
index 7138a4b..5a4c1a3 100644
--- a/includes/changetags/ChangeTags.php
+++ b/includes/changetags/ChangeTags.php
@@ -235,7 +235,7 @@
&$rev_id = null, &$log_id = null, $params = null, RecentChange
$rc = null,
User $user = null
) {
- global $wgUseChangeTagStatisticsTable;
+ global $wgChangeTagsSchemaMigrationStage;
$tagsToAdd = array_filter( (array)$tagsToAdd ); // Make sure
we're submitting all tags...
$tagsToRemove = array_filter( (array)$tagsToRemove );
@@ -326,6 +326,7 @@
$tagRow = array_filter(
[
'ct_tag' => $tag,
+ // TODO ct_tag_id
'ct_rc_id' => $rc_id,
'ct_log_id' => $log_id,
'ct_rev_id' => $rev_id,
@@ -334,14 +335,14 @@
);
$dbw->startAtomic( __METHOD__ );
$dbw->insert( 'change_tag', $tagRow,
__METHOD__, [ 'IGNORE' ] );
- if ( $dbw->affectedRows() &&
$wgUseChangeTagStatisticsTable > 0 ) {
+ if ( $dbw->affectedRows() &&
$wgChangeTagsSchemaMigrationStage > MIGRATION_OLD ) {
// increment tag hitcount and record
timestamp
$timestamp = $dbw->timestamp();
$dbw->upsert(
- 'change_tag_statistics',
- [ 'cts_tag' => $tag,
'cts_count' => 1, 'cts_timestamp' => $timestamp ],
- [ 'cts_tag' ],
- [ 'cts_count = cts_count + 1',
'cts_timestamp' => $timestamp ],
+ 'tag',
+ [ 'tag_name' => $tag,
'tag_count' => 1, 'tag_timestamp' => $timestamp ],
+ [ 'tag_name' ],
+ [ 'tag_count = tag_count + 1',
'tag_timestamp' => $timestamp ],
__METHOD__
);
}
@@ -355,6 +356,7 @@
$conds = array_filter(
[
'ct_tag' => $tag,
+ // TODO ct_tag_id
'ct_rc_id' => $rc_id,
'ct_log_id' => $log_id,
'ct_rev_id' => $rev_id
@@ -362,18 +364,18 @@
);
$dbw->startAtomic( __METHOD__ );
$dbw->delete( 'change_tag', $conds, __METHOD__
);
- if ( $dbw->affectedRows() &&
$wgUseChangeTagStatisticsTable > 0 ) {
+ if ( $dbw->affectedRows() &&
$wgChangeTagsSchemaMigrationStage > MIGRATION_OLD ) {
// decrement tag hitcount
$dbw->update(
- 'change_tag_statistics',
- [ 'cts_count = cts_count - 1' ],
- [ 'cts_tag' => $tag ],
+ 'tag',
+ [ 'tag_count = tag_count - 1' ],
+ [ 'tag_name' => $tag ],
__METHOD__
);
// delete row if it reaches count of
zero
$dbw->delete(
- 'change_tag_statistics',
- [ 'cts_tag' => $tag,
'cts_count' => 0 ],
+ 'tag',
+ [ 'tag_name' => $tag,
'tag_count' => 0 ],
__METHOD__
);
}
@@ -1140,7 +1142,7 @@
* @since 1.25
*/
public static function deleteTagEverywhere( $tag ) {
- global $wgUseChangeTagStatisticsTable;
+ global $wgChangeTagsSchemaMigrationStage;
$dbw = wfGetDB( DB_MASTER );
$dbw->startAtomic( __METHOD__ );
@@ -1163,8 +1165,8 @@
// delete from change_tag
$dbw->delete( 'change_tag', [ 'ct_tag' => $tag ], __METHOD__ );
- if ( $wgUseChangeTagStatisticsTable > 0 ) {
- $dbw->delete( 'change_tag_statistics', [ 'cts_tag' =>
$tag ], __METHOD__ );
+ if ( $wgChangeTagsSchemaMigrationStage > MIGRATION_OLD ) {
+ $dbw->delete( 'tag', [ 'tag_name' => $tag ], __METHOD__
);
}
$dbw->endAtomic( __METHOD__ );
@@ -1315,7 +1317,7 @@
/**
* Basically lists defined tags which count even if they aren't applied
to anything.
* It returns a union of the results of listExplicitlyDefinedTags() and
- * listExtensionDefinedTags().
+ * listSoftwareDefinedTags().
*
* @return string[] Array of strings: tags
*/
@@ -1446,19 +1448,19 @@
$cache->makeKey( 'change-tag-statistics' ),
WANObjectCache::TTL_MINUTE * 5,
function ( $oldValue, &$ttl, array &$setOpts ) use (
$fname ) {
- global $wgUseChangeTagStatisticsTable;
+ global $wgChangeTagsSchemaMigrationStage;
$dbr = wfGetDB( DB_REPLICA, 'vslow' );
$setOpts += Database::getCacheSetOptions( $dbr
);
- if ( $wgUseChangeTagStatisticsTable > 1 ) {
- // fetch from change_tag_statistics
table
+ if ( $wgChangeTagsSchemaMigrationStage ===
MIGRATION_NEW ) {
+ // fetch from tag table
$res = $dbr->select(
- 'change_tag_statistics',
- [ 'cts_tag', 'cts_count' ],
+ 'tag',
+ [ 'tag_name', 'tag_count' ],
[],
$fname,
- [ 'ORDER BY' => 'cts_count
DESC' ]
+ [ 'ORDER BY' => 'tag_count
DESC' ]
);
$out = [];
diff --git a/includes/installer/MssqlUpdater.php
b/includes/installer/MssqlUpdater.php
index 21ce5ab..e258d40 100644
--- a/includes/installer/MssqlUpdater.php
+++ b/includes/installer/MssqlUpdater.php
@@ -107,7 +107,8 @@
[ 'addIndex', 'site_stats', 'PRIMARY',
'patch-site_stats-pk.sql' ],
// 1.31
- [ 'addTable', 'change_tag_statistics',
'patch-change_tag_statistics.sql' ],
+ [ 'addTable', 'tag', 'patch-tag_table.sql' ],
+ [ 'addField', 'change_tag', 'ct_tag_id',
'patch-ct_tag_id.sql' ],
];
}
diff --git a/includes/installer/MysqlUpdater.php
b/includes/installer/MysqlUpdater.php
index 4bdbfa9..fba9bd4 100644
--- a/includes/installer/MysqlUpdater.php
+++ b/includes/installer/MysqlUpdater.php
@@ -331,7 +331,8 @@
[ 'doUnsignedSyncronisation' ],
// 1.31
- [ 'addTable', 'change_tag_statistics',
'patch-change_tag_statistics.sql' ],
+ [ 'addTable', 'tag', 'patch-tag_table.sql' ],
+ [ 'addField', 'change_tag', 'ct_tag_id',
'patch-ct_tag_id.sql' ],
];
}
diff --git a/includes/installer/OracleUpdater.php
b/includes/installer/OracleUpdater.php
index 7b87e0b..6f47f55 100644
--- a/includes/installer/OracleUpdater.php
+++ b/includes/installer/OracleUpdater.php
@@ -128,7 +128,8 @@
[ 'addIndex', 'site_stats', 'PRIMARY',
'patch-site_stats-pk.sql' ],
// 1.31
- [ 'addTable', 'change_tag_statistics',
'patch-change_tag_statistics.sql' ],
+ [ 'addTable', 'tag', 'patch-tag_table.sql' ],
+ [ 'addField', 'change_tag', 'ct_tag_id',
'patch-ct_tag_id.sql' ],
// KEEP THIS AT THE BOTTOM!!
[ 'doRebuildDuplicateFunction' ],
diff --git a/includes/installer/PostgresUpdater.php
b/includes/installer/PostgresUpdater.php
index 9ce3762..fedcde6 100644
--- a/includes/installer/PostgresUpdater.php
+++ b/includes/installer/PostgresUpdater.php
@@ -485,7 +485,8 @@
[ 'addTable', 'ip_changes', 'patch-ip_changes.sql' ],
// 1.31
- [ 'addTable', 'change_tag_statistics',
'patch-change_tag_statistics.sql' ],
+ [ 'addTable', 'tag', 'patch-tag_table.sql' ],
+ [ 'addField', 'change_tag', 'ct_tag_id',
'patch-ct_tag_id.sql' ],
];
}
diff --git a/includes/installer/SqliteUpdater.php
b/includes/installer/SqliteUpdater.php
index aceb394..dcd3f77 100644
--- a/includes/installer/SqliteUpdater.php
+++ b/includes/installer/SqliteUpdater.php
@@ -195,7 +195,8 @@
'patch-l10n_cache-primary-key.sql' ],
// 1.31
- [ 'addTable', 'change_tag_statistics',
'patch-change_tag_statistics.sql' ],
+ [ 'addTable', 'tag', 'patch-tag_table.sql' ],
+ [ 'addField', 'change_tag', 'ct_tag_id',
'patch-ct_tag_id.sql' ],
];
}
diff --git a/maintenance/archives/patch-change_tag_statistics.sql
b/maintenance/archives/patch-change_tag_statistics.sql
deleted file mode 100644
index 2c810bd..0000000
--- a/maintenance/archives/patch-change_tag_statistics.sql
+++ /dev/null
@@ -1,11 +0,0 @@
---
--- This table contains change tags hitcounts extracted from the change_tag
table;
--- as well as the timestamp of last addition.
---
-CREATE TABLE /*_*/change_tag_statistics (
- cts_tag varchar(255) NOT NULL PRIMARY KEY,
- cts_count bigint unsigned NOT NULL default 0,
- cts_timestamp varbinary(14) NULL
-) /*$wgDBTableOptions*/;
-
-CREATE INDEX /*i*/change_tag_statistics_count ON /*_*/change_tag_statistics
(cts_count);
diff --git a/maintenance/archives/patch-ct_tag_id.sql
b/maintenance/archives/patch-ct_tag_id.sql
new file mode 100644
index 0000000..feeaba8
--- /dev/null
+++ b/maintenance/archives/patch-ct_tag_id.sql
@@ -0,0 +1,2 @@
+ALTER TABLE /*_*/change_tags
+ ADD ct_tag_id int unsigned NOT NULL DEFAULT 0;
diff --git a/maintenance/archives/patch-tag_table.sql
b/maintenance/archives/patch-tag_table.sql
new file mode 100644
index 0000000..4eb53f9
--- /dev/null
+++ b/maintenance/archives/patch-tag_table.sql
@@ -0,0 +1,9 @@
+CREATE TABLE /*_*/tag (
+ tag_id int unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
+ tag_name varchar(255) NOT NULL,
+ tag_count bigint unsigned NOT NULL default 0,
+ tag_timestamp varbinary(14) NULL
+) /*$wgDBTableOptions*/;
+
+CREATE UNIQUE INDEX /*i*/tag_name ON /*_*/tag (tag_name);
+CREATE INDEX /*i*/tag_count ON /*_*/tag (tag_count);
diff --git a/maintenance/mssql/archives/patch-change_tag_statistics.sql
b/maintenance/mssql/archives/patch-change_tag_statistics.sql
deleted file mode 100644
index e7d9cfb..0000000
--- a/maintenance/mssql/archives/patch-change_tag_statistics.sql
+++ /dev/null
@@ -1,10 +0,0 @@
---
--- This table contains change tags hitcounts extracted from the change_tag
table.
---
-CREATE TABLE /*_*/change_tag_statistics (
- cts_tag nvarchar(255) NOT NULL CONSTRAINT PK_change_tag_statistics PRIMARY
KEY,
- cts_count int NOT NULL CONSTRAINT DF_cts_count DEFAULT 0,
- cts_timestamp nvarchar(14) NULL
-) /*$wgDBTableOptions*/;
-
-CREATE INDEX /*i*/change_tag_statistics_count ON /*_*/change_tag_statistics
(cts_count);
diff --git a/maintenance/mssql/archives/patch-ct_tag_id.sql
b/maintenance/mssql/archives/patch-ct_tag_id.sql
new file mode 100644
index 0000000..fecf027
--- /dev/null
+++ b/maintenance/mssql/archives/patch-ct_tag_id.sql
@@ -0,0 +1,2 @@
+ALTER TABLE /*_*/change_tags
+ ADD ct_tag_id int NULL REFERENCES /*_*/tag(tag_id);
diff --git a/maintenance/mssql/archives/patch-tag_table.sql
b/maintenance/mssql/archives/patch-tag_table.sql
new file mode 100644
index 0000000..0c7b59d
--- /dev/null
+++ b/maintenance/mssql/archives/patch-tag_table.sql
@@ -0,0 +1,10 @@
+-- Table defining tag names for IDs. Also stores hit counts to avoid expensive
queries on change_tag
+CREATE TABLE /*_*/tag (
+ tag_id int NOT NULL PRIMARY KEY IDENTITY,
+ tag_name nvarchar(255) NOT NULL,
+ tag_count int NOT NULL CONSTRAINT DF_tag_count DEFAULT 0,
+ tag_timestamp nvarchar(14) NULL
+) /*$wgDBTableOptions*/;
+
+CREATE UNIQUE INDEX /*i*/tag_name ON /*_*/tag (tag_name);
+CREATE INDEX /*i*/tag_count ON /*_*/tag (tag_count);
diff --git a/maintenance/mssql/tables.sql b/maintenance/mssql/tables.sql
index e395e1f..caf8052 100644
--- a/maintenance/mssql/tables.sql
+++ b/maintenance/mssql/tables.sql
@@ -1195,6 +1195,16 @@
ul_value nvarchar(max)
);
+-- Table defining tag names for IDs. Also stores hit counts to avoid expensive
queries on change_tag
+CREATE TABLE /*_*/tag (
+ tag_id int NOT NULL PRIMARY KEY IDENTITY,
+ tag_name nvarchar(255) NOT NULL,
+ tag_count int NOT NULL CONSTRAINT DF_tag_count DEFAULT 0,
+ tag_timestamp nvarchar(14) NULL
+) /*$wgDBTableOptions*/;
+
+CREATE UNIQUE INDEX /*i*/tag_name ON /*_*/tag (tag_name);
+CREATE INDEX /*i*/tag_count ON /*_*/tag (tag_count);
-- A table to track tags for revisions, logs and recent changes.
CREATE TABLE /*_*/change_tag (
@@ -1205,8 +1215,10 @@
ct_log_id int NULL REFERENCES /*_*/logging(log_id),
-- REVID for the change
ct_rev_id int NULL REFERENCES /*_*/revision(rev_id),
- -- Tag applied
+ -- Tag name (being migrated to ct_tag_id)
ct_tag nvarchar(255) NOT NULL,
+ -- Tag ID
+ ct_tag_id int NULL REFERENCES /*_*/tag(tag_id),
-- Parameters for the tag, presently unused
ct_params nvarchar(max) NULL
);
@@ -1216,15 +1228,6 @@
CREATE UNIQUE INDEX /*i*/change_tag_rev_tag ON /*_*/change_tag
(ct_rev_id,ct_tag);
-- Covering index, so we can pull all the info only out of the index.
CREATE INDEX /*i*/change_tag_tag_id ON /*_*/change_tag
(ct_tag,ct_rc_id,ct_rev_id,ct_log_id);
-
-CREATE TABLE /*_*/change_tag_statistics (
- cts_tag nvarchar(255) NOT NULL CONSTRAINT PK_change_tag_statistics PRIMARY
KEY,
- cts_count int NOT NULL CONSTRAINT DF_cts_count DEFAULT 0,
- cts_timestamp nvarchar(14) NULL
-) /*$wgDBTableOptions*/;
-
-CREATE INDEX /*i*/change_tag_statistics_count ON /*_*/change_tag_statistics
(cts_count);
-
-- Rollup table to pull a LIST of tags simply without ugly GROUP_CONCAT
-- that only works on MySQL 4.1+
diff --git a/maintenance/oracle/archives/patch-change_tag_statistics.sql
b/maintenance/oracle/archives/patch-change_tag_statistics.sql
deleted file mode 100644
index 7b89610..0000000
--- a/maintenance/oracle/archives/patch-change_tag_statistics.sql
+++ /dev/null
@@ -1,11 +0,0 @@
---
--- This table contains change tags hitcounts extracted from the change_tag
table.
---
-CREATE TABLE &mw_prefix.change_tag_statistics (
- cts_tag VARCHAR2(255) NOT NULL,
- cts_count NUMBER NOT NULL DEFAULT 0,
- cts_timestamp TIMESTAMP(6) WITH TIME ZONE
-);
-
-ALTER TABLE &mw_prefix.change_tag_statistics ADD CONSTRAINT
&mw_prefix.change_tag_statistics_pk PRIMARY KEY (cts_tag);
-CREATE INDEX &mw_prefix.change_tag_statistics_i01 ON
&mw_prefix.change_tag_statistics (cts_count);
diff --git a/maintenance/oracle/archives/patch-ct_tag_id.sql
b/maintenance/oracle/archives/patch-ct_tag_id.sql
new file mode 100644
index 0000000..f8d6de7
--- /dev/null
+++ b/maintenance/oracle/archives/patch-ct_tag_id.sql
@@ -0,0 +1,5 @@
+define mw_prefix='{$wgDBprefix}';
+
+ALTER TABLE &mw_prefix.change_tag ADD (
+ct_tag_id NUMBER NOT NULL,
+);
diff --git a/maintenance/oracle/archives/patch-tag_table.sql
b/maintenance/oracle/archives/patch-tag_table.sql
new file mode 100644
index 0000000..28d7b2d
--- /dev/null
+++ b/maintenance/oracle/archives/patch-tag_table.sql
@@ -0,0 +1,11 @@
+define mw_prefix='{$wgDBprefix}';
+
+CREATE TABLE &mw_prefix.tag (
+ tag_id NUMBER NOT NULL,
+ tag_name VARCHAR2(255) NOT NULL,
+ tag_count NUMBER NOT NULL DEFAULT 0,
+ tag_timestamp TIMESTAMP(6) WITH TIME ZONE
+);
+
+ALTER TABLE &mw_prefix.tag ADD CONSTRAINT &mw_prefix.tag_pk PRIMARY KEY
(tag_id);
+CREATE INDEX &mw_prefix.tag_i01 ON &mw_prefix.tag (tag_count);
diff --git a/maintenance/oracle/tables.sql b/maintenance/oracle/tables.sql
index 3bbcb02..42fe85c 100644
--- a/maintenance/oracle/tables.sql
+++ b/maintenance/oracle/tables.sql
@@ -739,12 +739,23 @@
);
ALTER TABLE &mw_prefix.updatelog ADD CONSTRAINT &mw_prefix.updatelog_pk
PRIMARY KEY (ul_key);
+CREATE TABLE &mw_prefix.tag (
+ tag_id NUMBER NOT NULL,
+ tag_name VARCHAR2(255) NOT NULL,
+ tag_count NUMBER NOT NULL DEFAULT 0,
+ tag_timestamp TIMESTAMP(6) WITH TIME ZONE
+);
+
+ALTER TABLE &mw_prefix.tag ADD CONSTRAINT &mw_prefix.tag_pk PRIMARY KEY
(tag_id);
+CREATE INDEX &mw_prefix.tag_i01 ON &mw_prefix.tag (tag_count);
+
CREATE TABLE &mw_prefix.change_tag (
ct_id NUMBER NOT NULL,
ct_rc_id NUMBER NULL,
ct_log_id NUMBER NULL,
ct_rev_id NUMBER NULL,
ct_tag VARCHAR2(255) NOT NULL,
+ ct_tag_id NUMBER NOT NULL,
ct_params BLOB NULL
);
ALTER TABLE &mw_prefix.change_tag ADD CONSTRAINT &mw_prefix.change_tag_pk
PRIMARY KEY (ct_id);
@@ -752,15 +763,6 @@
CREATE UNIQUE INDEX &mw_prefix.change_tag_u02 ON &mw_prefix.change_tag
(ct_log_id,ct_tag);
CREATE UNIQUE INDEX &mw_prefix.change_tag_u03 ON &mw_prefix.change_tag
(ct_rev_id,ct_tag);
CREATE INDEX &mw_prefix.change_tag_i01 ON &mw_prefix.change_tag
(ct_tag,ct_rc_id,ct_rev_id,ct_log_id);
-
-CREATE TABLE &mw_prefix.change_tag_statistics (
- cts_tag VARCHAR2(255) NOT NULL,
- cts_count NUMBER NOT NULL DEFAULT 0,
- cts_timestamp TIMESTAMP(6) WITH TIME ZONE
-);
-
-ALTER TABLE &mw_prefix.change_tag_statistics ADD CONSTRAINT
&mw_prefix.change_tag_statistics_pk PRIMARY KEY (cts_tag);
-CREATE INDEX &mw_prefix.change_tag_statistics_i01 ON
&mw_prefix.change_tag_statistics (cts_count);
CREATE TABLE &mw_prefix.tag_summary (
ts_id NUMBER NOT NULL,
diff --git a/maintenance/populateTagTable.php b/maintenance/populateTagTable.php
new file mode 100644
index 0000000..57b5734
--- /dev/null
+++ b/maintenance/populateTagTable.php
@@ -0,0 +1,117 @@
+<?php
+/**
+ * Pouplates tag table
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script that makes the required database updates for the tag
table to be of any use.
+ *
+ * @ingroup Maintenance
+ */
+class PopulateTagTable extends LoggedUpdateMaintenance {
+ public function __construct() {
+ parent::__construct();
+ $this->addDescription( 'Populates tag table' );
+ }
+
+ protected function getUpdateKey() {
+ return 'populate tag table';
+ }
+
+ protected function updateSkippedMessage() {
+ return 'tag table already populated.';
+ }
+
+ protected function doDBUpdates() {
+ global $wgChangeTagsSchemaMigrationStage;
+ $db = $this->getDB( DB_MASTER );
+ if ( $wgChangeTagsSchemaMigrationStage === MIGRATION_OLD ) {
+ $this->error( "\$wgChangeTagsSchemaMigrationStage must
not be set to MIGRATION_OLD\n" .
+ "Set it to MIGRATION_WRITE_BOTH if this is the
first time you're running this script." );
+ return false;
+ }
+ if ( !$db->tableExists( 'tag' ) ) {
+ $this->error( "tag table does not exist" );
+ return false;
+ }
+ if ( !$db->fieldExists( 'change_tag', 'ct_tag_id' ) ) {
+ $this->error( "change_tag table does not have ct_tag_id
field" );
+ return false;
+ }
+ $this->output( "Populating tag table\n" );
+
+ // fetch tags used on the wiki
+ $res = $db->select(
+ 'change_tag',
+ 'ct_tag',
+ [],
+ __METHOD__,
+ [ 'DISTINCT' ]
+ );
+
+ $tags = 0;
+ $ctRows = 0;
+ foreach ( $res as $row ) {
+ $tag = $row->ct_tag;
+ $db->startAtomic( __METHOD__ );
+ $hitcount = $db->selectRowCount(
+ 'change_tag',
+ '*',
+ [ 'ct_tag' => $tag ],
+ __METHOD__
+ );
+ if ( $hitcount ) {
+ $tags++;
+ // Insert a row with name and hit count
(timestamp is not computed)
+ $row = [ 'tag_name' => $tag, 'tag_count' =>
$hitcount ];
+ $db->insert( 'tag', $row, __METHOD__, [
'IGNORE' ] );
+ if ( $db->affectedRows() > 0 ) {
+ $tagId = $db->insertId();
+ } else {
+ // The row is already there. Replace
it, then query its ID.
+ $db->replace( 'tag', [ 'tag_name' ],
$row );
+ $tagId = $db->selectField( 'tag',
'tag_id', [ 'tag_name' => $tag ] );
+ }
+ }
+ $db->endAtomic( __METHOD__ );
+ if ( $hitcount ) {
+ // Update the ID in the change_tag rows
+ $db->update(
+ 'change_tag',
+ [ 'cts_tag_id' => $tagId ],
+ [ 'cts_tag' => $tag ]
+ );
+ $ctRows += $db->affectedRows();
+ }
+ }
+
+ $this->output(
+ "tag table population complete. {$tags} tags added,
{$ctRows} change_tag rows updated\n"
+ );
+
+ return true;
+ }
+}
+
+$maintClass = "PopulateTagTable";
+require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/maintenance/populateTagUsageStatistics.php
b/maintenance/populateTagUsageStatistics.php
deleted file mode 100644
index dbdb2d9..0000000
--- a/maintenance/populateTagUsageStatistics.php
+++ /dev/null
@@ -1,99 +0,0 @@
-<?php
-/**
- * Pouplates change_tag_statistics table
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Maintenance
- */
-
-require_once __DIR__ . '/Maintenance.php';
-
-/**
- * Maintenance script that makes the required database updates for
change_tag_statistics
- * table to be of any use.
- *
- * @ingroup Maintenance
- */
-class PopulateTagUsageStatistics extends LoggedUpdateMaintenance {
- public function __construct() {
- parent::__construct();
- $this->addDescription( 'Populates change_tag_statistics' );
- }
-
- protected function getUpdateKey() {
- return 'populate change_tag_statistics';
- }
-
- protected function updateSkippedMessage() {
- return 'change_tag_statistics table already populated.';
- }
-
- protected function doDBUpdates() {
- $db = $this->getDB( DB_MASTER );
- if ( !$db->tableExists( 'change_tag_statistics' ) ) {
- $this->error( "change_tag_statistics table does not
exist" );
-
- return false;
- }
- $this->output( "Populating change_tag_statistics table\n" );
-
- // fetch tags used on the wiki
- $res = $db->select(
- 'change_tag',
- 'ct_tag',
- [],
- __METHOD__,
- [ 'DISTINCT' ]
- );
-
- $count = 0;
- foreach ( $res as $row ) {
- $tag = $row->ct_tag;
- $db->startAtomic( __METHOD__ );
- // get hitcount
- $hitcount = $db->selectRowCount(
- 'change_tag',
- '*',
- [ 'ct_tag' => $tag ],
- __METHOD__
- );
- if ( $hitcount ) {
- $count++;
- // make sure we have a hitcount in case the tag
was completely removed since line 56
- // record hitcount (replace in case a tag was
inserted since clearing the table)
- // timestamp is not retrieved (too expansive in
the middle of a sensitive operation)
- $db->replace(
- 'change_tag_statistics',
- [ 'cts_tag' ],
- [ 'cts_tag' => $tag, 'cts_count' =>
$hitcount ],
- __METHOD__
- );
- }
- $db->endAtomic( __METHOD__ );
- }
-
- $this->output(
- "change_tag_statistics population complete ...
hitcounts added for {$count} tags\n"
- );
-
- return true;
- }
-}
-
-$maintClass = "PopulateTagUsageStatistics";
-require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/maintenance/postgres/archives/patch-change_tag_statistics.sql
b/maintenance/postgres/archives/patch-change_tag_statistics.sql
deleted file mode 100644
index b5176cf..0000000
--- a/maintenance/postgres/archives/patch-change_tag_statistics.sql
+++ /dev/null
@@ -1,10 +0,0 @@
---
--- This table contains change tags hitcounts extracted from the change_tag
table.
---
-CREATE TABLE change_tag_statistics (
- cts_tag TEXT NOT NULL PRIMARY KEY,
- cts_count INTEGER NOT NULL DEFAULT 0,
- cts_timestamp TIMESTAMPTZ
-);
-
-CREATE INDEX change_tag_statistics_count ON change_tag_statistics(cts_count);
diff --git a/maintenance/postgres/archives/patch-ct_tag_id.sql
b/maintenance/postgres/archives/patch-ct_tag_id.sql
new file mode 100644
index 0000000..0156a40
--- /dev/null
+++ b/maintenance/postgres/archives/patch-ct_tag_id.sql
@@ -0,0 +1,2 @@
+ALTER TABLE change_tags
+ ADD COLUMN ct_tag_id INTEGER NOT NULL DEFAULT 0;
diff --git a/maintenance/postgres/archives/patch-tag_table.sql
b/maintenance/postgres/archives/patch-tag_table.sql
new file mode 100644
index 0000000..6d27feb
--- /dev/null
+++ b/maintenance/postgres/archives/patch-tag_table.sql
@@ -0,0 +1,9 @@
+CREATE SEQUENCE tag_tag_id_seq;
+CREATE TABLE tag (
+ tag_id INTEGER NOT NULL PRIMARY KEY DEFAULT
nextval('tag_tag_id_seq'),
+ tag_name TEXT NOT NULL,
+ tag_count INTEGER NOT NULL DEFAULT 0,
+ tag_timestamp TIMESTAMPTZ NULL
+);
+CREATE UNIQUE INDEX tag_name_idx ON tag(tag_name);
+CREATE INDEX tag_count_idx ON tag(tag_count);
diff --git a/maintenance/postgres/tables.sql b/maintenance/postgres/tables.sql
index 2e3e811..8a3fb7f 100644
--- a/maintenance/postgres/tables.sql
+++ b/maintenance/postgres/tables.sql
@@ -26,6 +26,7 @@
DROP SEQUENCE IF EXISTS archive_ar_id_seq CASCADE;
DROP SEQUENCE IF EXISTS externallinks_el_id_seq CASCADE;
DROP SEQUENCE IF EXISTS sites_site_id_seq CASCADE;
+DROP SEQUENCE IF EXISTS tag_tag_id_seq CASCADE;
DROP SEQUENCE IF EXISTS change_tag_ct_id_seq CASCADE;
DROP SEQUENCE IF EXISTS tag_summary_ts_id_seq CASCADE;
DROP FUNCTION IF EXISTS page_deleted() CASCADE;
@@ -704,6 +705,16 @@
CREATE UNIQUE INDEX category_title ON category(cat_title);
CREATE INDEX category_pages ON category(cat_pages);
+CREATE SEQUENCE tag_tag_id_seq;
+CREATE TABLE tag (
+ tag_id INTEGER NOT NULL PRIMARY KEY DEFAULT
nextval('tag_tag_id_seq'),
+ tag_name TEXT NOT NULL,
+ tag_count INTEGER NOT NULL DEFAULT 0,
+ tag_timestamp TIMESTAMPTZ NULL
+);
+CREATE UNIQUE INDEX tag_name_idx ON tag(tag_name);
+CREATE INDEX tag_count_idx ON tag(tag_count);
+
CREATE SEQUENCE change_tag_ct_id_seq;
CREATE TABLE change_tag (
ct_id INTEGER NOT NULL PRIMARY KEY DEFAULT
nextval('change_tag_ct_id_seq'),
@@ -711,20 +722,13 @@
ct_log_id INTEGER NULL,
ct_rev_id INTEGER NULL,
ct_tag TEXT NOT NULL,
+ ct_tag_id INTEGER NOT NULL DEFAULT 0,
ct_params TEXT NULL
);
CREATE UNIQUE INDEX change_tag_rc_tag ON change_tag(ct_rc_id,ct_tag);
CREATE UNIQUE INDEX change_tag_log_tag ON change_tag(ct_log_id,ct_tag);
CREATE UNIQUE INDEX change_tag_rev_tag ON change_tag(ct_rev_id,ct_tag);
CREATE INDEX change_tag_tag_id ON
change_tag(ct_tag,ct_rc_id,ct_rev_id,ct_log_id);
-
-CREATE TABLE change_tag_statistics (
- cts_tag TEXT NOT NULL PRIMARY KEY,
- cts_count INTEGER NOT NULL DEFAULT 0,
- cts_timestamp TIMESTAMPTZ
-);
-
-CREATE INDEX change_tag_statistics_count ON change_tag_statistics(cts_count);
CREATE SEQUENCE tag_summary_ts_id_seq;
CREATE TABLE tag_summary (
diff --git a/maintenance/tables.sql b/maintenance/tables.sql
index 8945c3b..06cfc26 100644
--- a/maintenance/tables.sql
+++ b/maintenance/tables.sql
@@ -1685,8 +1685,18 @@
ul_value blob
) /*$wgDBTableOptions*/;
+-- Table defining tag names for IDs. Also stores hit counts to avoid expensive
queries on change_tag
+CREATE TABLE /*_*/tag (
+ tag_id int unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
+ tag_name varchar(255) NOT NULL,
+ tag_count bigint unsigned NOT NULL default 0,
+ tag_timestamp varbinary(14) NULL
+) /*$wgDBTableOptions*/;
--- A table to track tags for revisions, logs and recent changes.
+CREATE UNIQUE INDEX /*i*/tag_name ON /*_*/tag (tag_name);
+CREATE INDEX /*i*/tag_count ON /*_*/tag (tag_count);
+
+-- A table to track which revisions, logs and recent changes are tagged with
what tag.
CREATE TABLE /*_*/change_tag (
ct_id int unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
-- RCID for the change
@@ -1695,8 +1705,10 @@
ct_log_id int unsigned NULL,
-- REVID for the change
ct_rev_id int unsigned NULL,
- -- Tag applied
+ -- Tag name (being migrated to ct_tag_id)
ct_tag varchar(255) NOT NULL,
+ -- Tag ID (foreign key to tag.tag_id)
+ ct_tag_id int unsigned NOT NULL,
-- Parameters for the tag, presently unused
ct_params blob NULL
) /*$wgDBTableOptions*/;
@@ -1706,17 +1718,6 @@
CREATE UNIQUE INDEX /*i*/change_tag_rev_tag ON /*_*/change_tag
(ct_rev_id,ct_tag);
-- Covering index, so we can pull all the info only out of the index.
CREATE INDEX /*i*/change_tag_tag_id ON /*_*/change_tag
(ct_tag,ct_rc_id,ct_rev_id,ct_log_id);
-
--- Table giving hit counts and timestamp of last addition for tags,
--- to avoid expansive queries on change_tag.
-CREATE TABLE /*_*/change_tag_statistics (
- cts_tag varchar(255) NOT NULL PRIMARY KEY,
- cts_count bigint unsigned NOT NULL default 0,
- cts_timestamp varbinary(14) NULL
-) /*$wgDBTableOptions*/;
-
--- Index so we can sort by count
-CREATE INDEX /*i*/change_tag_statistics_count ON /*_*/change_tag_statistics
(cts_count);
-- Rollup table to pull a LIST of tags simply without ugly GROUP_CONCAT
-- that only works on MySQL 4.1+
--
To view, visit https://gerrit.wikimedia.org/r/405375
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie3dfa28e40f16061466b7d5954fedda63764b66f
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Catrope <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits