Catrope has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/405375 )

Change subject: [WIP] Change tag schema normalization
......................................................................

[WIP] Change tag schema normalization

Sketch of how this would work. Doesn't deal with populating
ct_tag_id yet.

Bug: T185355
Change-Id: Ie3dfa28e40f16061466b7d5954fedda63764b66f
---
M autoload.php
M includes/DefaultSettings.php
M includes/api/ApiQueryTags.php
M includes/changetags/ChangeTags.php
M includes/installer/MssqlUpdater.php
M includes/installer/MysqlUpdater.php
M includes/installer/OracleUpdater.php
M includes/installer/PostgresUpdater.php
M includes/installer/SqliteUpdater.php
D maintenance/archives/patch-change_tag_statistics.sql
A maintenance/archives/patch-ct_tag_id.sql
A maintenance/archives/patch-tag_table.sql
D maintenance/mssql/archives/patch-change_tag_statistics.sql
A maintenance/mssql/archives/patch-ct_tag_id.sql
A maintenance/mssql/archives/patch-tag_table.sql
M maintenance/mssql/tables.sql
D maintenance/oracle/archives/patch-change_tag_statistics.sql
A maintenance/oracle/archives/patch-ct_tag_id.sql
A maintenance/oracle/archives/patch-tag_table.sql
M maintenance/oracle/tables.sql
A maintenance/populateTagTable.php
D maintenance/populateTagUsageStatistics.php
D maintenance/postgres/archives/patch-change_tag_statistics.sql
A maintenance/postgres/archives/patch-ct_tag_id.sql
A maintenance/postgres/archives/patch-tag_table.sql
M maintenance/postgres/tables.sql
M maintenance/tables.sql
27 files changed, 275 insertions(+), 227 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/75/405375/1

diff --git a/autoload.php b/autoload.php
index c392ee8..e15bc23 100644
--- a/autoload.php
+++ b/autoload.php
@@ -1143,7 +1143,7 @@
        'PopulateRecentChangesSource' => __DIR__ . 
'/maintenance/populateRecentChangesSource.php',
        'PopulateRevisionLength' => __DIR__ . 
'/maintenance/populateRevisionLength.php',
        'PopulateRevisionSha1' => __DIR__ . 
'/maintenance/populateRevisionSha1.php',
-       'PopulateTagUsageStatistics' => __DIR__ . 
'/maintenance/populateTagUsageStatistics.php',
+       'PopulateTagTable' => __DIR__ . '/maintenance/populateTagTable.php',
        'PostgreSqlLockManager' => __DIR__ . 
'/includes/libs/lockmanager/PostgreSqlLockManager.php',
        'PostgresInstaller' => __DIR__ . 
'/includes/installer/PostgresInstaller.php',
        'PostgresUpdater' => __DIR__ . 
'/includes/installer/PostgresUpdater.php',
diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php
index c30f453..3eb5869 100644
--- a/includes/DefaultSettings.php
+++ b/includes/DefaultSettings.php
@@ -6966,16 +6966,6 @@
 ];
 
 /**
- * Temporary feature flag to handle deploy of change_tag_statistics table
- * For a live deploy, the following is recommended:
- * - create the change_tag_statistics table with patch-change_tag_statistics
- * - set $wgUseChangeTagStatisticsTable to 1: update-only mode
- * - run maintenance script populateTagUsageStatistics.php
- * - set $wgUseChangeTagStatisticsTable to 2: full mode
- */
-$wgUseChangeTagStatisticsTable = 0;
-
-/**
  * If set to an integer, pages that are watched by this many users or more
  * will not require the unwatchedpages permission to view the number of
  * watchers.
@@ -8819,6 +8809,21 @@
  */
 $wgCommentTableSchemaMigrationStage = MIGRATION_OLD;
 
+//$wgUseChangeTagStatisticsTable = 0;
+
+/**
+ * change_tag table schema migration stage.
+ *
+ * MIGRATION_OLD: Do not use tag table or ct_tag_id.
+ * MIGRATION_WRITE_BOTH: Write to the tag table and ct_tag_id, but read from 
the old schema
+ * MIGRATION_WRITE_NEW: Behaves the same as MIGRATION_WRITE_BOTH
+ * MIGRATION_NEW: Use the tag table and ct_tag_id, do read/write ct_tag
+ *
+ * @since 1.31
+ * @var int One of the MIGRATION_* constants
+ */
+$wgChangeTagsSchemaMigrationStage = MIGRATION_OLD;
+
 /**
  * For really cool vim folding this needs to be at the end:
  * vim: foldmarker=@{,@} foldmethod=marker
diff --git a/includes/api/ApiQueryTags.php b/includes/api/ApiQueryTags.php
index 54fff2a..eca1688 100644
--- a/includes/api/ApiQueryTags.php
+++ b/includes/api/ApiQueryTags.php
@@ -36,7 +36,7 @@
        }
 
        public function execute() {
-               global $wgUseChangeTagStatisticsTable;
+               global $wgChangeTagsSchemaMigrationStage;
                $params = $this->extractRequestParams();
 
                $prop = array_flip( $params['prop'] );
@@ -56,19 +56,19 @@
                $explicitlyDefinedTags = array_fill_keys( 
ChangeTags::listExplicitlyDefinedTags(), 0 );
                $softwareActivatedTags = array_fill_keys( 
ChangeTags::listSoftwareActivatedTags(), 0 );
 
-               if ( $wgUseChangeTagStatisticsTable > 1 ) {
-                       $this->addTables( 'change_tag_statistics' );
-                       $this->addFields( [ 'cts_tag', 'cts_count', 
'cts_timestamp' ] );
+               if ( $wgChangeTagsSchemaMigrationStage === MIGRATION_NEW ) {
+                       $this->addTables( 'tag' );
+                       $this->addFields( [ 'tag_name', 'tag_count', 
'tag_timestamp' ] );
                        $this->addOption( 'LIMIT', $limit + 1 );
-                       $this->addWhereRange( 'cts_tag', 'newer', 
$params['continue'], null );
+                       $this->addWhereRange( 'tag_name', 'newer', 
$params['continue'], null );
                        $res = $this->select( __METHOD__ );
 
                        $hitCounts = [];
                        $timestamps = [];
                        foreach ( $res as $row ) {
-                               if ( $row->cts_count ) {
-                                       $hitCounts[$row->cts_tag] = 
(int)$row->cts_count;
-                                       $timestamps[$row->cts_tag] = 
(string)$row->cts_timestamp;
+                               if ( $row->tag_count ) {
+                                       $hitCounts[$row->tag_name] = 
(int)$row->tag_count;
+                                       $timestamps[$row->tag_name] = 
(string)$row->tag_timestamp;
                                }
                        }
                } else {
diff --git a/includes/changetags/ChangeTags.php 
b/includes/changetags/ChangeTags.php
index 7138a4b..5a4c1a3 100644
--- a/includes/changetags/ChangeTags.php
+++ b/includes/changetags/ChangeTags.php
@@ -235,7 +235,7 @@
                &$rev_id = null, &$log_id = null, $params = null, RecentChange 
$rc = null,
                User $user = null
        ) {
-               global $wgUseChangeTagStatisticsTable;
+               global $wgChangeTagsSchemaMigrationStage;
 
                $tagsToAdd = array_filter( (array)$tagsToAdd ); // Make sure 
we're submitting all tags...
                $tagsToRemove = array_filter( (array)$tagsToRemove );
@@ -326,6 +326,7 @@
                                $tagRow = array_filter(
                                        [
                                                'ct_tag' => $tag,
+                                               // TODO ct_tag_id
                                                'ct_rc_id' => $rc_id,
                                                'ct_log_id' => $log_id,
                                                'ct_rev_id' => $rev_id,
@@ -334,14 +335,14 @@
                                );
                                $dbw->startAtomic( __METHOD__ );
                                $dbw->insert( 'change_tag', $tagRow, 
__METHOD__, [ 'IGNORE' ] );
-                               if ( $dbw->affectedRows() && 
$wgUseChangeTagStatisticsTable > 0 ) {
+                               if ( $dbw->affectedRows() && 
$wgChangeTagsSchemaMigrationStage > MIGRATION_OLD ) {
                                        // increment tag hitcount and record 
timestamp
                                        $timestamp = $dbw->timestamp();
                                        $dbw->upsert(
-                                               'change_tag_statistics',
-                                               [ 'cts_tag' => $tag, 
'cts_count' => 1, 'cts_timestamp' => $timestamp ],
-                                               [ 'cts_tag' ],
-                                               [ 'cts_count = cts_count + 1', 
'cts_timestamp' => $timestamp ],
+                                               'tag',
+                                               [ 'tag_name' => $tag, 
'tag_count' => 1, 'tag_timestamp' => $timestamp ],
+                                               [ 'tag_name' ],
+                                               [ 'tag_count = tag_count + 1', 
'tag_timestamp' => $timestamp ],
                                                __METHOD__
                                        );
                                }
@@ -355,6 +356,7 @@
                                $conds = array_filter(
                                        [
                                                'ct_tag' => $tag,
+                                               // TODO ct_tag_id
                                                'ct_rc_id' => $rc_id,
                                                'ct_log_id' => $log_id,
                                                'ct_rev_id' => $rev_id
@@ -362,18 +364,18 @@
                                );
                                $dbw->startAtomic( __METHOD__ );
                                $dbw->delete( 'change_tag', $conds, __METHOD__ 
);
-                               if ( $dbw->affectedRows() && 
$wgUseChangeTagStatisticsTable > 0 ) {
+                               if ( $dbw->affectedRows() && 
$wgChangeTagsSchemaMigrationStage > MIGRATION_OLD ) {
                                        // decrement tag hitcount
                                        $dbw->update(
-                                               'change_tag_statistics',
-                                               [ 'cts_count = cts_count - 1' ],
-                                               [ 'cts_tag' => $tag ],
+                                               'tag',
+                                               [ 'tag_count = tag_count - 1' ],
+                                               [ 'tag_name' => $tag ],
                                                __METHOD__
                                        );
                                        // delete row if it reaches count of 
zero
                                        $dbw->delete(
-                                               'change_tag_statistics',
-                                               [ 'cts_tag' => $tag, 
'cts_count' => 0 ],
+                                               'tag',
+                                               [ 'tag_name' => $tag, 
'tag_count' => 0 ],
                                                __METHOD__
                                        );
                                }
@@ -1140,7 +1142,7 @@
         * @since 1.25
         */
        public static function deleteTagEverywhere( $tag ) {
-               global $wgUseChangeTagStatisticsTable;
+               global $wgChangeTagsSchemaMigrationStage;
                $dbw = wfGetDB( DB_MASTER );
                $dbw->startAtomic( __METHOD__ );
 
@@ -1163,8 +1165,8 @@
                // delete from change_tag
                $dbw->delete( 'change_tag', [ 'ct_tag' => $tag ], __METHOD__ );
 
-               if ( $wgUseChangeTagStatisticsTable > 0 ) {
-                       $dbw->delete( 'change_tag_statistics', [ 'cts_tag' => 
$tag ], __METHOD__ );
+               if ( $wgChangeTagsSchemaMigrationStage > MIGRATION_OLD ) {
+                       $dbw->delete( 'tag', [ 'tag_name' => $tag ], __METHOD__ 
);
                }
 
                $dbw->endAtomic( __METHOD__ );
@@ -1315,7 +1317,7 @@
        /**
         * Basically lists defined tags which count even if they aren't applied 
to anything.
         * It returns a union of the results of listExplicitlyDefinedTags() and
-        * listExtensionDefinedTags().
+        * listSoftwareDefinedTags().
         *
         * @return string[] Array of strings: tags
         */
@@ -1446,19 +1448,19 @@
                        $cache->makeKey( 'change-tag-statistics' ),
                        WANObjectCache::TTL_MINUTE * 5,
                        function ( $oldValue, &$ttl, array &$setOpts ) use ( 
$fname ) {
-                               global $wgUseChangeTagStatisticsTable;
+                               global $wgChangeTagsSchemaMigrationStage;
                                $dbr = wfGetDB( DB_REPLICA, 'vslow' );
 
                                $setOpts += Database::getCacheSetOptions( $dbr 
);
 
-                               if ( $wgUseChangeTagStatisticsTable > 1 ) {
-                                       // fetch from change_tag_statistics 
table
+                               if ( $wgChangeTagsSchemaMigrationStage === 
MIGRATION_NEW ) {
+                                       // fetch from tag table
                                        $res = $dbr->select(
-                                               'change_tag_statistics',
-                                               [ 'cts_tag', 'cts_count' ],
+                                               'tag',
+                                               [ 'tag_name', 'tag_count' ],
                                                [],
                                                $fname,
-                                               [ 'ORDER BY' => 'cts_count 
DESC' ]
+                                               [ 'ORDER BY' => 'tag_count 
DESC' ]
                                        );
 
                                        $out = [];
diff --git a/includes/installer/MssqlUpdater.php 
b/includes/installer/MssqlUpdater.php
index 21ce5ab..e258d40 100644
--- a/includes/installer/MssqlUpdater.php
+++ b/includes/installer/MssqlUpdater.php
@@ -107,7 +107,8 @@
                        [ 'addIndex', 'site_stats', 'PRIMARY', 
'patch-site_stats-pk.sql' ],
 
                        // 1.31
-                       [ 'addTable', 'change_tag_statistics', 
'patch-change_tag_statistics.sql' ],
+                       [ 'addTable', 'tag', 'patch-tag_table.sql' ],
+                       [ 'addField', 'change_tag', 'ct_tag_id', 
'patch-ct_tag_id.sql' ],
                ];
        }
 
diff --git a/includes/installer/MysqlUpdater.php 
b/includes/installer/MysqlUpdater.php
index 4bdbfa9..fba9bd4 100644
--- a/includes/installer/MysqlUpdater.php
+++ b/includes/installer/MysqlUpdater.php
@@ -331,7 +331,8 @@
                        [ 'doUnsignedSyncronisation' ],
 
                        // 1.31
-                       [ 'addTable', 'change_tag_statistics', 
'patch-change_tag_statistics.sql' ],
+                       [ 'addTable', 'tag', 'patch-tag_table.sql' ],
+                       [ 'addField', 'change_tag', 'ct_tag_id', 
'patch-ct_tag_id.sql' ],
                ];
        }
 
diff --git a/includes/installer/OracleUpdater.php 
b/includes/installer/OracleUpdater.php
index 7b87e0b..6f47f55 100644
--- a/includes/installer/OracleUpdater.php
+++ b/includes/installer/OracleUpdater.php
@@ -128,7 +128,8 @@
                        [ 'addIndex', 'site_stats', 'PRIMARY', 
'patch-site_stats-pk.sql' ],
 
                        // 1.31
-                       [ 'addTable', 'change_tag_statistics', 
'patch-change_tag_statistics.sql' ],
+                       [ 'addTable', 'tag', 'patch-tag_table.sql' ],
+                       [ 'addField', 'change_tag', 'ct_tag_id', 
'patch-ct_tag_id.sql' ],
 
                        // KEEP THIS AT THE BOTTOM!!
                        [ 'doRebuildDuplicateFunction' ],
diff --git a/includes/installer/PostgresUpdater.php 
b/includes/installer/PostgresUpdater.php
index 9ce3762..fedcde6 100644
--- a/includes/installer/PostgresUpdater.php
+++ b/includes/installer/PostgresUpdater.php
@@ -485,7 +485,8 @@
                        [ 'addTable', 'ip_changes', 'patch-ip_changes.sql' ],
 
                        // 1.31
-                       [ 'addTable', 'change_tag_statistics', 
'patch-change_tag_statistics.sql' ],
+                       [ 'addTable', 'tag', 'patch-tag_table.sql' ],
+                       [ 'addField', 'change_tag', 'ct_tag_id', 
'patch-ct_tag_id.sql' ],
                ];
        }
 
diff --git a/includes/installer/SqliteUpdater.php 
b/includes/installer/SqliteUpdater.php
index aceb394..dcd3f77 100644
--- a/includes/installer/SqliteUpdater.php
+++ b/includes/installer/SqliteUpdater.php
@@ -195,7 +195,8 @@
                                'patch-l10n_cache-primary-key.sql' ],
 
                        // 1.31
-                       [ 'addTable', 'change_tag_statistics', 
'patch-change_tag_statistics.sql' ],
+                       [ 'addTable', 'tag', 'patch-tag_table.sql' ],
+                       [ 'addField', 'change_tag', 'ct_tag_id', 
'patch-ct_tag_id.sql' ],
                ];
        }
 
diff --git a/maintenance/archives/patch-change_tag_statistics.sql 
b/maintenance/archives/patch-change_tag_statistics.sql
deleted file mode 100644
index 2c810bd..0000000
--- a/maintenance/archives/patch-change_tag_statistics.sql
+++ /dev/null
@@ -1,11 +0,0 @@
---
--- This table contains change tags hitcounts extracted from the change_tag 
table;
--- as well as the timestamp of last addition.
---
-CREATE TABLE /*_*/change_tag_statistics (
-  cts_tag varchar(255) NOT NULL PRIMARY KEY,
-  cts_count bigint unsigned NOT NULL default 0,
-  cts_timestamp varbinary(14) NULL
-) /*$wgDBTableOptions*/;
-
-CREATE INDEX /*i*/change_tag_statistics_count ON /*_*/change_tag_statistics 
(cts_count);
diff --git a/maintenance/archives/patch-ct_tag_id.sql 
b/maintenance/archives/patch-ct_tag_id.sql
new file mode 100644
index 0000000..feeaba8
--- /dev/null
+++ b/maintenance/archives/patch-ct_tag_id.sql
@@ -0,0 +1,2 @@
+ALTER TABLE /*_*/change_tags
+    ADD ct_tag_id int unsigned NOT NULL DEFAULT 0;
diff --git a/maintenance/archives/patch-tag_table.sql 
b/maintenance/archives/patch-tag_table.sql
new file mode 100644
index 0000000..4eb53f9
--- /dev/null
+++ b/maintenance/archives/patch-tag_table.sql
@@ -0,0 +1,9 @@
+CREATE TABLE /*_*/tag (
+    tag_id int unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
+    tag_name varchar(255) NOT NULL,
+    tag_count bigint unsigned NOT NULL default 0,
+    tag_timestamp varbinary(14) NULL
+) /*$wgDBTableOptions*/;
+
+CREATE UNIQUE INDEX /*i*/tag_name ON /*_*/tag (tag_name);
+CREATE INDEX /*i*/tag_count ON /*_*/tag (tag_count);
diff --git a/maintenance/mssql/archives/patch-change_tag_statistics.sql 
b/maintenance/mssql/archives/patch-change_tag_statistics.sql
deleted file mode 100644
index e7d9cfb..0000000
--- a/maintenance/mssql/archives/patch-change_tag_statistics.sql
+++ /dev/null
@@ -1,10 +0,0 @@
---
--- This table contains change tags hitcounts extracted from the change_tag 
table.
---
-CREATE TABLE /*_*/change_tag_statistics (
-  cts_tag nvarchar(255) NOT NULL CONSTRAINT PK_change_tag_statistics PRIMARY 
KEY,
-  cts_count int NOT NULL CONSTRAINT DF_cts_count DEFAULT 0,
-  cts_timestamp nvarchar(14) NULL
-) /*$wgDBTableOptions*/;
-
-CREATE INDEX /*i*/change_tag_statistics_count ON /*_*/change_tag_statistics 
(cts_count);
diff --git a/maintenance/mssql/archives/patch-ct_tag_id.sql 
b/maintenance/mssql/archives/patch-ct_tag_id.sql
new file mode 100644
index 0000000..fecf027
--- /dev/null
+++ b/maintenance/mssql/archives/patch-ct_tag_id.sql
@@ -0,0 +1,2 @@
+ALTER TABLE /*_*/change_tags
+    ADD ct_tag_id int NULL REFERENCES /*_*/tag(tag_id);
diff --git a/maintenance/mssql/archives/patch-tag_table.sql 
b/maintenance/mssql/archives/patch-tag_table.sql
new file mode 100644
index 0000000..0c7b59d
--- /dev/null
+++ b/maintenance/mssql/archives/patch-tag_table.sql
@@ -0,0 +1,10 @@
+-- Table defining tag names for IDs. Also stores hit counts to avoid expensive 
queries on change_tag
+CREATE TABLE /*_*/tag (
+  tag_id int NOT NULL PRIMARY KEY IDENTITY,
+  tag_name nvarchar(255) NOT NULL,
+  tag_count int NOT NULL CONSTRAINT DF_tag_count DEFAULT 0,
+  tag_timestamp nvarchar(14) NULL
+) /*$wgDBTableOptions*/;
+
+CREATE UNIQUE INDEX /*i*/tag_name ON /*_*/tag (tag_name);
+CREATE INDEX /*i*/tag_count ON /*_*/tag (tag_count);
diff --git a/maintenance/mssql/tables.sql b/maintenance/mssql/tables.sql
index e395e1f..caf8052 100644
--- a/maintenance/mssql/tables.sql
+++ b/maintenance/mssql/tables.sql
@@ -1195,6 +1195,16 @@
   ul_value nvarchar(max)
 );
 
+-- Table defining tag names for IDs. Also stores hit counts to avoid expensive 
queries on change_tag
+CREATE TABLE /*_*/tag (
+  tag_id int NOT NULL PRIMARY KEY IDENTITY,
+  tag_name nvarchar(255) NOT NULL,
+  tag_count int NOT NULL CONSTRAINT DF_tag_count DEFAULT 0,
+  tag_timestamp nvarchar(14) NULL
+) /*$wgDBTableOptions*/;
+
+CREATE UNIQUE INDEX /*i*/tag_name ON /*_*/tag (tag_name);
+CREATE INDEX /*i*/tag_count ON /*_*/tag (tag_count);
 
 -- A table to track tags for revisions, logs and recent changes.
 CREATE TABLE /*_*/change_tag (
@@ -1205,8 +1215,10 @@
   ct_log_id int NULL REFERENCES /*_*/logging(log_id),
   -- REVID for the change
   ct_rev_id int NULL REFERENCES /*_*/revision(rev_id),
-  -- Tag applied
+  -- Tag name (being migrated to ct_tag_id)
   ct_tag nvarchar(255) NOT NULL,
+  -- Tag ID
+  ct_tag_id int NULL REFERENCES /*_*/tag(tag_id),
   -- Parameters for the tag, presently unused
   ct_params nvarchar(max) NULL
 );
@@ -1216,15 +1228,6 @@
 CREATE UNIQUE INDEX /*i*/change_tag_rev_tag ON /*_*/change_tag 
(ct_rev_id,ct_tag);
 -- Covering index, so we can pull all the info only out of the index.
 CREATE INDEX /*i*/change_tag_tag_id ON /*_*/change_tag 
(ct_tag,ct_rc_id,ct_rev_id,ct_log_id);
-
-CREATE TABLE /*_*/change_tag_statistics (
-  cts_tag nvarchar(255) NOT NULL CONSTRAINT PK_change_tag_statistics PRIMARY 
KEY,
-  cts_count int NOT NULL CONSTRAINT DF_cts_count DEFAULT 0,
-  cts_timestamp nvarchar(14) NULL
-) /*$wgDBTableOptions*/;
-
-CREATE INDEX /*i*/change_tag_statistics_count ON /*_*/change_tag_statistics 
(cts_count);
-
 
 -- Rollup table to pull a LIST of tags simply without ugly GROUP_CONCAT
 -- that only works on MySQL 4.1+
diff --git a/maintenance/oracle/archives/patch-change_tag_statistics.sql 
b/maintenance/oracle/archives/patch-change_tag_statistics.sql
deleted file mode 100644
index 7b89610..0000000
--- a/maintenance/oracle/archives/patch-change_tag_statistics.sql
+++ /dev/null
@@ -1,11 +0,0 @@
---
--- This table contains change tags hitcounts extracted from the change_tag 
table.
---
-CREATE TABLE &mw_prefix.change_tag_statistics (
-  cts_tag VARCHAR2(255) NOT NULL,
-  cts_count NUMBER NOT NULL DEFAULT 0,
-  cts_timestamp TIMESTAMP(6) WITH TIME ZONE
-);
-
-ALTER TABLE &mw_prefix.change_tag_statistics ADD CONSTRAINT 
&mw_prefix.change_tag_statistics_pk PRIMARY KEY (cts_tag);
-CREATE INDEX &mw_prefix.change_tag_statistics_i01 ON 
&mw_prefix.change_tag_statistics (cts_count);
diff --git a/maintenance/oracle/archives/patch-ct_tag_id.sql 
b/maintenance/oracle/archives/patch-ct_tag_id.sql
new file mode 100644
index 0000000..f8d6de7
--- /dev/null
+++ b/maintenance/oracle/archives/patch-ct_tag_id.sql
@@ -0,0 +1,5 @@
+define mw_prefix='{$wgDBprefix}';
+
+ALTER TABLE &mw_prefix.change_tag ADD (
+ct_tag_id NUMBER NOT NULL,
+);
diff --git a/maintenance/oracle/archives/patch-tag_table.sql 
b/maintenance/oracle/archives/patch-tag_table.sql
new file mode 100644
index 0000000..28d7b2d
--- /dev/null
+++ b/maintenance/oracle/archives/patch-tag_table.sql
@@ -0,0 +1,11 @@
+define mw_prefix='{$wgDBprefix}';
+
+CREATE TABLE &mw_prefix.tag (
+  tag_id NUMBER NOT NULL,
+  tag_name VARCHAR2(255) NOT NULL,
+  tag_count NUMBER NOT NULL DEFAULT 0,
+  tag_timestamp TIMESTAMP(6) WITH TIME ZONE
+);
+
+ALTER TABLE &mw_prefix.tag ADD CONSTRAINT &mw_prefix.tag_pk PRIMARY KEY 
(tag_id);
+CREATE INDEX &mw_prefix.tag_i01 ON &mw_prefix.tag (tag_count);
diff --git a/maintenance/oracle/tables.sql b/maintenance/oracle/tables.sql
index 3bbcb02..42fe85c 100644
--- a/maintenance/oracle/tables.sql
+++ b/maintenance/oracle/tables.sql
@@ -739,12 +739,23 @@
 );
 ALTER TABLE &mw_prefix.updatelog ADD CONSTRAINT &mw_prefix.updatelog_pk 
PRIMARY KEY (ul_key);
 
+CREATE TABLE &mw_prefix.tag (
+  tag_id NUMBER NOT NULL,
+  tag_name VARCHAR2(255) NOT NULL,
+  tag_count NUMBER NOT NULL DEFAULT 0,
+  tag_timestamp TIMESTAMP(6) WITH TIME ZONE
+);
+
+ALTER TABLE &mw_prefix.tag ADD CONSTRAINT &mw_prefix.tag_pk PRIMARY KEY 
(tag_id);
+CREATE INDEX &mw_prefix.tag_i01 ON &mw_prefix.tag (tag_count);
+
 CREATE TABLE &mw_prefix.change_tag (
   ct_id NUMBER NOT NULL,
   ct_rc_id NUMBER NULL,
   ct_log_id NUMBER NULL,
   ct_rev_id NUMBER NULL,
   ct_tag VARCHAR2(255) NOT NULL,
+  ct_tag_id NUMBER NOT NULL,
   ct_params BLOB NULL
 );
 ALTER TABLE &mw_prefix.change_tag ADD CONSTRAINT &mw_prefix.change_tag_pk 
PRIMARY KEY (ct_id);
@@ -752,15 +763,6 @@
 CREATE UNIQUE INDEX &mw_prefix.change_tag_u02 ON &mw_prefix.change_tag 
(ct_log_id,ct_tag);
 CREATE UNIQUE INDEX &mw_prefix.change_tag_u03 ON &mw_prefix.change_tag 
(ct_rev_id,ct_tag);
 CREATE INDEX &mw_prefix.change_tag_i01 ON &mw_prefix.change_tag 
(ct_tag,ct_rc_id,ct_rev_id,ct_log_id);
-
-CREATE TABLE &mw_prefix.change_tag_statistics (
-  cts_tag VARCHAR2(255) NOT NULL,
-  cts_count NUMBER NOT NULL DEFAULT 0,
-  cts_timestamp TIMESTAMP(6) WITH TIME ZONE
-);
-
-ALTER TABLE &mw_prefix.change_tag_statistics ADD CONSTRAINT 
&mw_prefix.change_tag_statistics_pk PRIMARY KEY (cts_tag);
-CREATE INDEX &mw_prefix.change_tag_statistics_i01 ON 
&mw_prefix.change_tag_statistics (cts_count);
 
 CREATE TABLE &mw_prefix.tag_summary (
   ts_id NUMBER NOT NULL,
diff --git a/maintenance/populateTagTable.php b/maintenance/populateTagTable.php
new file mode 100644
index 0000000..57b5734
--- /dev/null
+++ b/maintenance/populateTagTable.php
@@ -0,0 +1,117 @@
+<?php
+/**
+ * Pouplates tag table
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script that makes the required database updates for the tag 
table to be of any use.
+ *
+ * @ingroup Maintenance
+ */
+class PopulateTagTable extends LoggedUpdateMaintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->addDescription( 'Populates tag table' );
+       }
+
+       protected function getUpdateKey() {
+               return 'populate tag table';
+       }
+
+       protected function updateSkippedMessage() {
+               return 'tag table already populated.';
+       }
+
+       protected function doDBUpdates() {
+               global $wgChangeTagsSchemaMigrationStage;
+               $db = $this->getDB( DB_MASTER );
+               if ( $wgChangeTagsSchemaMigrationStage === MIGRATION_OLD ) {
+                       $this->error( "\$wgChangeTagsSchemaMigrationStage must 
not be set to MIGRATION_OLD\n" .
+                               "Set it to MIGRATION_WRITE_BOTH if this is the 
first time you're running this script." );
+                       return false;
+               }
+               if ( !$db->tableExists( 'tag' ) ) {
+                       $this->error( "tag table does not exist" );
+                       return false;
+               }
+               if ( !$db->fieldExists( 'change_tag', 'ct_tag_id' ) ) {
+                       $this->error( "change_tag table does not have ct_tag_id 
field" );
+                       return false;
+               }
+               $this->output( "Populating tag table\n" );
+
+               // fetch tags used on the wiki
+               $res = $db->select(
+                       'change_tag',
+                       'ct_tag',
+                       [],
+                       __METHOD__,
+                       [ 'DISTINCT' ]
+               );
+
+               $tags = 0;
+               $ctRows = 0;
+               foreach ( $res as $row ) {
+                       $tag = $row->ct_tag;
+                       $db->startAtomic( __METHOD__ );
+                       $hitcount = $db->selectRowCount(
+                               'change_tag',
+                               '*',
+                               [ 'ct_tag' => $tag ],
+                               __METHOD__
+                       );
+                       if ( $hitcount ) {
+                               $tags++;
+                               // Insert a row with name and hit count 
(timestamp is not computed)
+                               $row = [ 'tag_name' => $tag, 'tag_count' => 
$hitcount ];
+                               $db->insert( 'tag', $row, __METHOD__, [ 
'IGNORE' ] );
+                               if ( $db->affectedRows() > 0 ) {
+                                       $tagId = $db->insertId();
+                               } else {
+                                       // The row is already there. Replace 
it, then query its ID.
+                                       $db->replace( 'tag', [ 'tag_name' ], 
$row );
+                                       $tagId = $db->selectField( 'tag', 
'tag_id', [ 'tag_name' => $tag ] );
+                               }
+                       }
+                       $db->endAtomic( __METHOD__ );
+                       if ( $hitcount ) {
+                               // Update the ID in the change_tag rows
+                               $db->update(
+                                       'change_tag',
+                                       [ 'cts_tag_id' => $tagId ],
+                                       [ 'cts_tag' => $tag ]
+                               );
+                               $ctRows += $db->affectedRows();
+                       }
+               }
+
+               $this->output(
+                       "tag table population complete. {$tags} tags added, 
{$ctRows} change_tag rows updated\n"
+               );
+
+               return true;
+       }
+}
+
+$maintClass = "PopulateTagTable";
+require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/maintenance/populateTagUsageStatistics.php 
b/maintenance/populateTagUsageStatistics.php
deleted file mode 100644
index dbdb2d9..0000000
--- a/maintenance/populateTagUsageStatistics.php
+++ /dev/null
@@ -1,99 +0,0 @@
-<?php
-/**
- * Pouplates change_tag_statistics table
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Maintenance
- */
-
-require_once __DIR__ . '/Maintenance.php';
-
-/**
- * Maintenance script that makes the required database updates for 
change_tag_statistics
- * table to be of any use.
- *
- * @ingroup Maintenance
- */
-class PopulateTagUsageStatistics extends LoggedUpdateMaintenance {
-       public function __construct() {
-               parent::__construct();
-               $this->addDescription( 'Populates change_tag_statistics' );
-       }
-
-       protected function getUpdateKey() {
-               return 'populate change_tag_statistics';
-       }
-
-       protected function updateSkippedMessage() {
-               return 'change_tag_statistics table already populated.';
-       }
-
-       protected function doDBUpdates() {
-               $db = $this->getDB( DB_MASTER );
-               if ( !$db->tableExists( 'change_tag_statistics' ) ) {
-                       $this->error( "change_tag_statistics table does not 
exist" );
-
-                       return false;
-               }
-               $this->output( "Populating change_tag_statistics table\n" );
-
-               // fetch tags used on the wiki
-               $res = $db->select(
-                       'change_tag',
-                       'ct_tag',
-                       [],
-                       __METHOD__,
-                       [ 'DISTINCT' ]
-               );
-
-               $count = 0;
-               foreach ( $res as $row ) {
-                       $tag = $row->ct_tag;
-                       $db->startAtomic( __METHOD__ );
-                       // get hitcount
-                       $hitcount = $db->selectRowCount(
-                               'change_tag',
-                               '*',
-                               [ 'ct_tag' => $tag ],
-                               __METHOD__
-                       );
-                       if ( $hitcount ) {
-                               $count++;
-                               // make sure we have a hitcount in case the tag 
was completely removed since line 56
-                               // record hitcount (replace in case a tag was 
inserted since clearing the table)
-                               // timestamp is not retrieved (too expansive in 
the middle of a sensitive operation)
-                               $db->replace(
-                                       'change_tag_statistics',
-                                       [ 'cts_tag' ],
-                                       [ 'cts_tag' => $tag, 'cts_count' => 
$hitcount ],
-                                       __METHOD__
-                               );
-                       }
-                       $db->endAtomic( __METHOD__ );
-               }
-
-               $this->output(
-                       "change_tag_statistics population complete ... 
hitcounts added for {$count} tags\n"
-               );
-
-               return true;
-       }
-}
-
-$maintClass = "PopulateTagUsageStatistics";
-require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/maintenance/postgres/archives/patch-change_tag_statistics.sql 
b/maintenance/postgres/archives/patch-change_tag_statistics.sql
deleted file mode 100644
index b5176cf..0000000
--- a/maintenance/postgres/archives/patch-change_tag_statistics.sql
+++ /dev/null
@@ -1,10 +0,0 @@
---
--- This table contains change tags hitcounts extracted from the change_tag 
table.
---
-CREATE TABLE change_tag_statistics (
-  cts_tag TEXT NOT NULL PRIMARY KEY,
-  cts_count INTEGER NOT NULL DEFAULT 0,
-  cts_timestamp TIMESTAMPTZ
-);
-
-CREATE INDEX change_tag_statistics_count ON change_tag_statistics(cts_count);
diff --git a/maintenance/postgres/archives/patch-ct_tag_id.sql 
b/maintenance/postgres/archives/patch-ct_tag_id.sql
new file mode 100644
index 0000000..0156a40
--- /dev/null
+++ b/maintenance/postgres/archives/patch-ct_tag_id.sql
@@ -0,0 +1,2 @@
+ALTER TABLE change_tags
+    ADD COLUMN ct_tag_id INTEGER NOT NULL DEFAULT 0;
diff --git a/maintenance/postgres/archives/patch-tag_table.sql 
b/maintenance/postgres/archives/patch-tag_table.sql
new file mode 100644
index 0000000..6d27feb
--- /dev/null
+++ b/maintenance/postgres/archives/patch-tag_table.sql
@@ -0,0 +1,9 @@
+CREATE SEQUENCE tag_tag_id_seq;
+CREATE TABLE tag (
+  tag_id        INTEGER  NOT NULL PRIMARY KEY DEFAULT 
nextval('tag_tag_id_seq'),
+  tag_name      TEXT     NOT NULL,
+  tag_count     INTEGER  NOT NULL DEFAULT 0,
+  tag_timestamp TIMESTAMPTZ  NULL
+);
+CREATE UNIQUE INDEX tag_name_idx ON tag(tag_name);
+CREATE INDEX tag_count_idx ON tag(tag_count);
diff --git a/maintenance/postgres/tables.sql b/maintenance/postgres/tables.sql
index 2e3e811..8a3fb7f 100644
--- a/maintenance/postgres/tables.sql
+++ b/maintenance/postgres/tables.sql
@@ -26,6 +26,7 @@
 DROP SEQUENCE IF EXISTS archive_ar_id_seq CASCADE;
 DROP SEQUENCE IF EXISTS externallinks_el_id_seq CASCADE;
 DROP SEQUENCE IF EXISTS sites_site_id_seq CASCADE;
+DROP SEQUENCE IF EXISTS tag_tag_id_seq CASCADE;
 DROP SEQUENCE IF EXISTS change_tag_ct_id_seq CASCADE;
 DROP SEQUENCE IF EXISTS tag_summary_ts_id_seq CASCADE;
 DROP FUNCTION IF EXISTS page_deleted() CASCADE;
@@ -704,6 +705,16 @@
 CREATE UNIQUE INDEX category_title ON category(cat_title);
 CREATE INDEX category_pages ON category(cat_pages);
 
+CREATE SEQUENCE tag_tag_id_seq;
+CREATE TABLE tag (
+  tag_id        INTEGER  NOT NULL PRIMARY KEY DEFAULT 
nextval('tag_tag_id_seq'),
+  tag_name      TEXT     NOT NULL,
+  tag_count     INTEGER  NOT NULL DEFAULT 0,
+  tag_timestamp TIMESTAMPTZ  NULL
+);
+CREATE UNIQUE INDEX tag_name_idx ON tag(tag_name);
+CREATE INDEX tag_count_idx ON tag(tag_count);
+
 CREATE SEQUENCE change_tag_ct_id_seq;
 CREATE TABLE change_tag (
   ct_id      INTEGER  NOT NULL  PRIMARY KEY DEFAULT 
nextval('change_tag_ct_id_seq'),
@@ -711,20 +722,13 @@
   ct_log_id  INTEGER      NULL,
   ct_rev_id  INTEGER      NULL,
   ct_tag     TEXT     NOT NULL,
+  ct_tag_id  INTEGER  NOT NULL DEFAULT 0,
   ct_params  TEXT         NULL
 );
 CREATE UNIQUE INDEX change_tag_rc_tag ON change_tag(ct_rc_id,ct_tag);
 CREATE UNIQUE INDEX change_tag_log_tag ON change_tag(ct_log_id,ct_tag);
 CREATE UNIQUE INDEX change_tag_rev_tag ON change_tag(ct_rev_id,ct_tag);
 CREATE INDEX change_tag_tag_id ON 
change_tag(ct_tag,ct_rc_id,ct_rev_id,ct_log_id);
-
-CREATE TABLE change_tag_statistics (
-  cts_tag TEXT NOT NULL PRIMARY KEY,
-  cts_count INTEGER NOT NULL DEFAULT 0,
-  cts_timestamp TIMESTAMPTZ
-);
-
-CREATE INDEX change_tag_statistics_count ON change_tag_statistics(cts_count);
 
 CREATE SEQUENCE tag_summary_ts_id_seq;
 CREATE TABLE tag_summary (
diff --git a/maintenance/tables.sql b/maintenance/tables.sql
index 8945c3b..06cfc26 100644
--- a/maintenance/tables.sql
+++ b/maintenance/tables.sql
@@ -1685,8 +1685,18 @@
   ul_value blob
 ) /*$wgDBTableOptions*/;
 
+-- Table defining tag names for IDs. Also stores hit counts to avoid expensive 
queries on change_tag
+CREATE TABLE /*_*/tag (
+    tag_id int unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
+    tag_name varchar(255) NOT NULL,
+    tag_count bigint unsigned NOT NULL default 0,
+    tag_timestamp varbinary(14) NULL
+) /*$wgDBTableOptions*/;
 
--- A table to track tags for revisions, logs and recent changes.
+CREATE UNIQUE INDEX /*i*/tag_name ON /*_*/tag (tag_name);
+CREATE INDEX /*i*/tag_count ON /*_*/tag (tag_count);
+
+-- A table to track which revisions, logs and recent changes are tagged with 
what tag.
 CREATE TABLE /*_*/change_tag (
   ct_id int unsigned NOT NULL PRIMARY KEY AUTO_INCREMENT,
   -- RCID for the change
@@ -1695,8 +1705,10 @@
   ct_log_id int unsigned NULL,
   -- REVID for the change
   ct_rev_id int unsigned NULL,
-  -- Tag applied
+  -- Tag name (being migrated to ct_tag_id)
   ct_tag varchar(255) NOT NULL,
+  -- Tag ID (foreign key to tag.tag_id)
+  ct_tag_id int unsigned NOT NULL,
   -- Parameters for the tag, presently unused
   ct_params blob NULL
 ) /*$wgDBTableOptions*/;
@@ -1706,17 +1718,6 @@
 CREATE UNIQUE INDEX /*i*/change_tag_rev_tag ON /*_*/change_tag 
(ct_rev_id,ct_tag);
 -- Covering index, so we can pull all the info only out of the index.
 CREATE INDEX /*i*/change_tag_tag_id ON /*_*/change_tag 
(ct_tag,ct_rc_id,ct_rev_id,ct_log_id);
-
--- Table giving hit counts and timestamp of last addition for tags,
--- to avoid expansive queries on change_tag.
-CREATE TABLE /*_*/change_tag_statistics (
-  cts_tag varchar(255) NOT NULL PRIMARY KEY,
-  cts_count bigint unsigned NOT NULL default 0,
-  cts_timestamp varbinary(14) NULL
-) /*$wgDBTableOptions*/;
-
--- Index so we can sort by count
-CREATE INDEX /*i*/change_tag_statistics_count ON /*_*/change_tag_statistics 
(cts_count);
 
 -- Rollup table to pull a LIST of tags simply without ugly GROUP_CONCAT
 -- that only works on MySQL 4.1+

-- 
To view, visit https://gerrit.wikimedia.org/r/405375
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ie3dfa28e40f16061466b7d5954fedda63764b66f
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Catrope <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to