BryanDavis has uploaded a new change for review. https://gerrit.wikimedia.org/r/86883
Change subject: New maintenance script to purge edited pages. ...................................................................... New maintenance script to purge edited pages. This maintenance script could be used to recover from large HTCP listener outages and/or network partitions. It searches the database for page revisions occurring within a specified time period, converts them to full URLs and sends squid/varnish purge messages using SquidUpdate. A command line option allows specifying a specific host:port combination for HTCP purge datagrams which can be used to isolate the purge announcement to a particular varnish server or data center. Bug: 54647 Change-Id: Id0e95290e83333545e6bb0378cc620c35c653bf9 --- A maintenance/purgeEditedFiles.php 1 file changed, 138 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core refs/changes/83/86883/1 diff --git a/maintenance/purgeEditedFiles.php b/maintenance/purgeEditedFiles.php new file mode 100644 index 0000000..1ec6bff --- /dev/null +++ b/maintenance/purgeEditedFiles.php @@ -0,0 +1,138 @@ +<?php +/** + * Send purge requests for pages edited in date range to squid/varnish. + * + * @section LICENSE + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance + */ + +require_once __DIR__ . '/Maintenance.php'; + +/** + * Maintenance script that sends purge requests for pages edited in a date + * range to squid/varnish. + * + * Can be used to recover from an HTCP message partition or other major cache + * layer interruption. + * + * @ingroup Maintenance + */ +class PurgeEditedFiles extends Maintenance { + + public function __construct() { + parent::__construct(); + $this->mDescription = 'Send purge requests for edits in date range to squid/varnish'; + $this->addOption( 'starttime', 'Starting timestamp', true, true ); + $this->addOption( 'endtime', 'Ending timestamp', true, true ); + $this->addOption( 'htcp-dest', 'HTCP announcement destination (host:port)', false, true ); + $this->addOption( 'dryrun', 'Do not send purge requests' ); + $this->addOption( 'verbose', 'Show more output', false, false, 'v' ); + $this->setBatchSize( 100 ); + } + + public function execute() { + global $wgHTCPRouting; + + if ( $this->hasOption( 'htcp-dest' ) ) { + $parts = explode( ':', $this->getOption( 'htcp-dest' ) ); + if ( count( $parts ) < 2 ) { + // add default htcp port + $parts[] = '4827'; + } + + // route all HTCP messages to provided host:port + $wgHTCPRouting = array( + '' => array( 'host' => $parts[0], 'port' => $parts[1] ), + ); + if ( $this->hasOption( 'verbose' ) ) { + $this->output( "HTCP broadcasts to {$parts[0]}:{$parts[1]}\n" ); + } + } + + $repo = RepoGroup::singleton()->getLocalRepo(); + $dbr = $repo->getSlaveDB(); + + $start = $this->getOption( 'starttime' ); + $end = $this->getOption( 'endtime' ); + + $minUpdate = $dbr->addQuotes( $dbr->timestamp( $start ) ); + $maxUpdate = $dbr->addQuotes( $dbr->timestamp( $end ) ); + $minId = -1; + + while ( true ) { + // find next N pages that were changed in the timerange + $res = $dbr->select( + array( 'page', 'revision' ), + array( 'page_id', 'page_namespace', 'page_title' ), + 'page_id = rev_page' + . ' AND rev_id = page_latest' + . " AND rev_timestamp >= {$minUpdate} " + . " AND rev_timestamp <= {$maxUpdate} " + . " AND page_id > {$minId} ", + __METHOD__, + array( + 'ORDER BY' => 'page_id', + 'LIMIT' => $this->mBatchSize, + ) + ); + + if ( !$res->numRows() ) { + // nothing more found so we are done + break; + } + + // create list of URLs from page_namespace + page_title + $urls = array(); + foreach ( $res as $row ) { + $title = Title::makeTitle( $row->page_namespace, $row->page_title ); + $url = $title->getInternalURL(); + $urls[] = $url; + // keep track of page_id for next query + $minId = $row->page_id; + } + + // send batch of purge requests out to squids + $this->sendPurgeRequest( $urls ); + } + + $this->output( "Done!\n" ); + } + + /** + * Helper to purge an array of $urls + * @param $urls array List of URLS to purge from squids + */ + private function sendPurgeRequest( $urls ) { + if ( $this->hasOption( 'dryrun' ) + || $this->hasOption( 'verbose' ) ) { + $this->output( implode( "\n", $urls ) . "\n" ); + } + + if ( $this->hasOption( 'dryrun' ) ) { + return; + } + + $u = new SquidUpdate( $urls ); + $u->doUpdate(); + } + +} + +$maintClass = "PurgeEditedFiles"; +require_once RUN_MAINTENANCE_IF_MAIN; -- To view, visit https://gerrit.wikimedia.org/r/86883 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Id0e95290e83333545e6bb0378cc620c35c653bf9 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/core Gerrit-Branch: master Gerrit-Owner: BryanDavis <[email protected]> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
