Cenarium has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/334337 )
Change subject: Add change_tag_statistics table to speed up tag hitcount
retrieval
......................................................................
Add change_tag_statistics table to speed up tag hitcount retrieval
This adds a change_tag_statistics table giving the hitcount
of each tag used on the wiki. This avoids querying change_tag
directly for hitcounts, which is extremely expansive.
The table is used only if requested in config, and requires a
maintenance script to be populated.
Bug: T91535
Change-Id: Ib20a2bdd2f7156f153271f8aeee433f9a7e26f49
---
M includes/DefaultSettings.php
M includes/changetags/ChangeTags.php
A maintenance/archives/patch_change_tag_statistics.sql
A maintenance/populateTagUsageStatistics.php
M maintenance/tables.sql
5 files changed, 154 insertions(+), 10 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/37/334337/1
diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php
index 086b615..a9f7e53 100644
--- a/includes/DefaultSettings.php
+++ b/includes/DefaultSettings.php
@@ -6819,6 +6819,13 @@
$wgUseTagFilter = true;
/**
+ * Uses the change_tag_statistics table instead of change_tag to retrieve
+ * tag hitcounts, requires populateTagUsageStatistics to have been run,
+ * which requires patch_change_tag_statistics applied
+ */
+$wgUseChangeTagStatisticsTable = false;
+
+/**
* If set to an integer, pages that are watched by this many users or more
* will not require the unwatchedpages permission to view the number of
* watchers.
diff --git a/includes/changetags/ChangeTags.php
b/includes/changetags/ChangeTags.php
index d2239eb..3e4ed99 100644
--- a/includes/changetags/ChangeTags.php
+++ b/includes/changetags/ChangeTags.php
@@ -172,6 +172,7 @@
&$rev_id = null, &$log_id = null, $params = null, RecentChange
$rc = null,
User $user = null
) {
+ global $wgUseChangeTagStatisticsTable;
$tagsToAdd = array_filter( (array)$tagsToAdd ); // Make sure
we're submitting all tags...
$tagsToRemove = array_filter( (array)$tagsToRemove );
@@ -272,6 +273,14 @@
}
$dbw->insert( 'change_tag', $tagsRows, __METHOD__, [
'IGNORE' ] );
+ if ( $wgUseChangeTagStatisticsTable ) {
+ $dbw->update(
+ 'change_tag_statistics',
+ [ 'cts_count = cts_count + 1' ],
+ [ 'cts_tag' => $tagsToAdd ],
+ __METHOD__
+ );
+ }
}
// delete from change_tag
@@ -286,6 +295,14 @@
]
);
$dbw->delete( 'change_tag', $conds, __METHOD__
);
+ }
+ if ( $wgUseChangeTagStatisticsTable ) {
+ $dbw->update(
+ 'change_tag_statistics',
+ [ 'cts_count = cts_count - 1' ],
+ [ 'cts_tag' => $tagsToRemove ],
+ __METHOD__
+ );
}
}
@@ -1018,6 +1035,7 @@
* @since 1.25
*/
public static function deleteTagEverywhere( $tag ) {
+ global $wgUseChangeTagStatisticsTable;
$dbw = wfGetDB( DB_MASTER );
$dbw->startAtomic( __METHOD__ );
@@ -1039,6 +1057,10 @@
// delete from change_tag
$dbw->delete( 'change_tag', [ 'ct_tag' => $tag ], __METHOD__ );
+
+ if ( $wgUseChangeTagStatisticsTable ) {
+ $dbw->delete( 'change_tag_statistics', [ 'cts_tag' =>
$tag ], __METHOD__ );
+ }
$dbw->endAtomic( __METHOD__ );
@@ -1314,21 +1336,39 @@
wfMemcKey( 'change-tag-statistics' ),
WANObjectCache::TTL_MINUTE * 5,
function ( $oldValue, &$ttl, array &$setOpts ) use (
$fname ) {
+ global $wgUseChangeTagStatisticsTable;
$dbr = wfGetDB( DB_REPLICA, 'vslow' );
$setOpts += Database::getCacheSetOptions( $dbr
);
- $res = $dbr->select(
- 'change_tag',
- [ 'ct_tag', 'hitcount' => 'count(*)' ],
- [],
- $fname,
- [ 'GROUP BY' => 'ct_tag', 'ORDER BY' =>
'hitcount DESC' ]
- );
+ if ( $wgUseChangeTagStatisticsTable ) {
+ $res = $dbr->select(
+ 'change_tag_statistics',
+ [ 'cts_tag', 'cts_count' ],
+ [],
+ $fname,
+ [ 'ORDER BY' => 'cts_count
DESC' ]
+ );
- $out = [];
- foreach ( $res as $row ) {
- $out[$row->ct_tag] = $row->hitcount;
+ $out = [];
+ foreach ( $res as $row ) {
+ if ( $row->cts_count ) {
+ $out[$row->cts_tag] =
$row->cts_count;
+ }
+ }
+ } else {
+ $res = $dbr->select(
+ 'change_tag',
+ [ 'ct_tag', 'hitcount' =>
'count(*)' ],
+ [],
+ $fname,
+ [ 'GROUP BY' => 'ct_tag',
'ORDER BY' => 'hitcount DESC' ]
+ );
+
+ $out = [];
+ foreach ( $res as $row ) {
+ $out[$row->ct_tag] =
$row->hitcount;
+ }
}
return $out;
diff --git a/maintenance/archives/patch_change_tag_statistics.sql
b/maintenance/archives/patch_change_tag_statistics.sql
new file mode 100644
index 0000000..995b12d
--- /dev/null
+++ b/maintenance/archives/patch_change_tag_statistics.sql
@@ -0,0 +1,7 @@
+--
+-- This table contains change tags hitcounts extracted from the change_tag
table.
+--
+CREATE TABLE /*_*/change_tag_statistics (
+ cts_tag varchar(255) NOT NULL PRIMARY KEY,
+ cts_count bigint unsigned default 0
+) /*$wgDBTableOptions*/;
diff --git a/maintenance/populateTagUsageStatistics.php
b/maintenance/populateTagUsageStatistics.php
new file mode 100644
index 0000000..f18626a
--- /dev/null
+++ b/maintenance/populateTagUsageStatistics.php
@@ -0,0 +1,84 @@
+<?php
+/**
+ * Pouplates change_tag_statistics table
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script that makes the required database updates for
change_tag_statistics
+ * table to be of any use.
+ *
+ * @ingroup Maintenance
+ */
+class PopulateTagUsageStatistics extends LoggedUpdateMaintenance {
+ public function __construct() {
+ parent::__construct();
+ $this->addDescription( 'Populates change_tag_statistics' );
+ }
+
+ protected function getUpdateKey() {
+ return 'populate change_tag_statistics';
+ }
+
+ protected function updateSkippedMessage() {
+ return 'change_tag_statistics table already populated.';
+ }
+
+ protected function doDBUpdates() {
+ $db = $this->getDB( DB_MASTER );
+ if ( !$db->tableExists( 'change_tag_statistics' ) ) {
+ $this->error( "change_tag_statistics table does not
exist" );
+
+ return false;
+ }
+ $this->output( "Populating change_tag_statistics table\n" );
+
+ // clear table
+ $db->delete( 'change_tag_statistics', [ '1=1' ], __METHOD__ );
+
+ // get hitcounts
+ $res = $db->select(
+ 'change_tag',
+ [ 'ct_tag', 'hitcount' => 'count(*)' ],
+ [],
+ __METHOD__,
+ [ 'GROUP BY' => 'ct_tag', 'ORDER BY' => 'hitcount DESC'
]
+ );
+
+ $count = 0;
+ $rows = [];
+ foreach ( $res as $row ) {
+ $count++;
+ $rows[] = [ 'cts_tag' => $row->ct_tag, 'cts_count' =>
$row->hitcount ];
+ }
+
+ // record hitcounts
+ $db->insert( 'change_tag_statistics', $rows, __METHOD__, [] );
+
+ $this->output( "change_tag_statistics population complete ...
hitcounts added for {$count} tags\n" );
+
+ return true;
+ }
+}
+
+$maintClass = "PopulateTagUsageStatistics";
+require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/maintenance/tables.sql b/maintenance/tables.sql
index 2b6ea03..04e8e43 100644
--- a/maintenance/tables.sql
+++ b/maintenance/tables.sql
@@ -1498,6 +1498,12 @@
-- Covering index, so we can pull all the info only out of the index.
CREATE INDEX /*i*/change_tag_tag_id ON /*_*/change_tag
(ct_tag,ct_rc_id,ct_rev_id,ct_log_id);
+-- Table giving hit counts for tags, to avoid expansive queries on change_tag.
+CREATE TABLE /*_*/change_tag_statistics (
+ cts_tag varchar(255) NOT NULL PRIMARY KEY,
+ cts_count bigint unsigned default 0
+) /*$wgDBTableOptions*/;
+
-- Rollup table to pull a LIST of tags simply without ugly GROUP_CONCAT
-- that only works on MySQL 4.1+
--
To view, visit https://gerrit.wikimedia.org/r/334337
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ib20a2bdd2f7156f153271f8aeee433f9a7e26f49
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Cenarium <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits