BryanDavis has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/87635


Change subject: [WIP] Add script to purge changed files from cache
......................................................................

[WIP] Add script to purge changed files from cache

This maintenance script enhances the prior purgeDeletedFiles script to
optionally also process create and modify events that may have left
stale information in front-end caches.

Changes from purgeDeletedFiles:
* `--starttime` & `--endtime` are now required
* `--type` allows selecting 'created' and/or 'modified' events in
    addition to 'deleted' events.
* `--dry-run` to list affected files but perform no purges
* `--htcp-dest` option to restrict HTCP broadcast messages

Change-Id: I7181ea461ba9758747bff4fd70530d00dab492cc
---
A maintenance/purgeChangedFiles.php
D maintenance/purgeDeletedFiles.php
2 files changed, 252 insertions(+), 125 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core 
refs/changes/35/87635/1

diff --git a/maintenance/purgeChangedFiles.php 
b/maintenance/purgeChangedFiles.php
new file mode 100644
index 0000000..78851cd
--- /dev/null
+++ b/maintenance/purgeChangedFiles.php
@@ -0,0 +1,252 @@
+<?php
+/**
+ * Scan the logging table and purge affected files within a timeframe.
+ *
+ * @section LICENSE
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script that scans the deletion log and purges affected files
+ * within a timeframe.
+ *
+ * @ingroup Maintenance
+ */
+class PurgeChangedFiles extends Maintenance {
+       /**
+        * Mapping from type option to log type and actions.
+        * @var array
+        */
+       private static $typeMappings = array(
+               'created' => array(
+                       'upload' => array( 'upload' ),
+                       'import' => array( 'upload', 'interwiki' ),
+               ),
+               'deleted' => array(
+                       'delete' => array( 'delete', 'revision' ),
+                       'suppress' => array( 'delete', 'revision' ),
+               ),
+               'modified' => array(
+                       'upload' => array( 'overwrite', 'revert' ),
+                       'move' => array( 'move', 'move_redir' ),
+               ),
+       );
+
+       /**
+        * @var string
+        */
+       private $startTimestamp;
+
+       /**
+        * @var string
+        */
+       private $endTimestamp;
+
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = "Scan the logging table and purge files 
and thumbnails.";
+               $this->addOption( 'starttime', 'Starting timestamp', true, true 
);
+               $this->addOption( 'endtime', 'Ending timestamp', true, true );
+               $this->addOption( 'type', 'Types of changes to send purges for 
(' .
+                       implode( ',', array_keys( self::$typeMappings ) ) . 
',all)', false, true );
+               $this->addOption( 'htcp-dest', 'HTCP announcement destination 
(IP:port)', false, true );
+               $this->addOption( 'dry-run', 'Do not send purge requests' );
+               $this->addOption( 'verbose', 'Show more output', false, false, 
'v' );
+               $this->setBatchSize( 100 );
+       }
+
+       public function execute() {
+               global $wgHTCPRouting;
+
+               if ( $this->hasOption( 'htcp-dest' ) ) {
+                       $parts = explode( ':', $this->getOption( 'htcp-dest' ) 
);
+                       if ( count( $parts ) < 2 ) {
+                               // Add default htcp port
+                               $parts[] = '4827';
+                       }
+
+                       // Route all HTCP messages to provided host:port
+                       $wgHTCPRouting = array(
+                               '' => array( 'host' => $parts[0], 'port' => 
$parts[1] ),
+                       );
+                       $this->verbose( "HTCP broadcasts to 
{$parts[0]}:{$parts[1]}\n" );
+               }
+
+               // Find out which actions we should be concerned with
+               $typeOpt = $this->getOption( 'type', 'all' );
+               $validTypes = array_keys( self::$typeMappings );
+               if ( $typeOpt === 'all' ) {
+                       // Convert 'all' to all registered types
+                       $typeOpt = implode( ',', $validTypes );
+               }
+               $typeList = explode( ',', $typeOpt );
+               foreach ( $typeList as $type ) {
+                       if ( !in_array( $type, $validTypes ) ) {
+                               $this->error( "\nERROR: Unknown type: 
{$type}\n" );
+                               $this->maybeHelp( true );
+                       }
+               }
+
+               // Validate the timestamps
+               $dbr = $this->getDB( DB_SLAVE );
+               $this->startTimestamp = $dbr->timestamp( $this->getOption( 
'starttime' ) );
+               $this->endTimestamp = $dbr->timestamp( $this->getOption( 
'endtime' ) );
+
+               if ( $this->startTimestamp > $this->endTimestamp ) {
+                       $this->error( "\nERROR: starttime after endtime\n" );
+                       $this->maybeHelp( true );
+               }
+
+               // Turn on verbose when dry-run is enabled
+               if ( $this->hasOption( 'dry-run' ) ) {
+                       $this->mOptions['verbose'] = 1;
+               }
+
+               $this->verbose( 'Purging files that were: ' . implode( ', ', 
$typeList ) . "\n");
+               foreach ( $typeList as $type ) {
+                       $this->verbose( "Checking for {$type} files...\n" );
+                       $this->purgeFromLogType( $type );
+                       if ( !$this->hasOption( 'dry-run' ) ) {
+                               $this->verbose( "...{$type} files purged.\n\n" 
);
+                       }
+               }
+       }
+
+       /**
+        * Purge cache and thumbnails for changes of the given type.
+        *
+        * @param string $type Type of change to find
+        */
+       protected function purgeFromLogType( $type ) {
+               $repo = RepoGroup::singleton()->getLocalRepo();
+               $dbr = $repo->getSlaveDB();
+
+               foreach ( self::$typeMappings[$type] as $logType => $logActions 
) {
+                       $this->verbose( "Scanning for {$logType}/" . implode( 
',', $logActions ) . "\n" );
+                       $conds = array(
+                               'log_namespace' => NS_FILE,
+                               'log_type' => $logType,
+                               'log_action' => $logActions,
+                               'log_timestamp >= ' . $dbr->addQuotes( 
$this->startTimestamp ),
+                               'log_timestamp <= ' . $dbr->addQuotes( 
$this->endTimestamp ),
+                       );
+
+                       $res = $dbr->select(
+                               'logging',
+                               array( 'log_title', 'log_timestamp' ),
+                               $conds,
+                               __METHOD__
+                       );
+
+                       foreach ( $res as $row ) {
+                               $file = $repo->newFile( Title::makeTitle( 
NS_FILE, $row->log_title ) );
+
+                               if ( $this->hasOption( 'dry-run' ) ) {
+                                       $this->verbose( 
"{$type}[{$row->log_timestamp}]: {$row->log_title}\n" );
+                                       continue;
+                               }
+
+                               // Purge current version and any versions in 
oldimage table
+                               $file->purgeCache();
+                               $file->purgeHistory();
+
+                               if ( $logType === 'delete' ) {
+                                       // If there is an orphaned storage 
file... delete it
+                                       if ( !$file->exists() && 
$repo->fileExists( $file->getPath() ) ) {
+                                               $dpath = $this->getDeletedPath( 
$repo, $file );
+                                               if ( $repo->fileExists( $dpath 
) ) {
+                                                       // Sanity check to 
avoid data loss
+                                                       
$repo->getBackend()->delete( array( 'src' => $file->getPath() ) );
+                                                       $this->verbose( 
"Deleted orphan file: {$file->getPath()}.\n" );
+                                               } else {
+                                                       $this->error( "File was 
not deleted: {$file->getPath()}.\n" );
+                                               }
+                                       }
+
+                                       // Purge items from fileachive table 
(rows are likely here)
+                                       $this->purgeFromArchiveTable( $repo, 
$file );
+
+                               } else if ( $logType === 'move' ) {
+                                       // FIXME bd808
+                                       /*
+                                        * 17:54 <AaronSchulz> not sure how to 
handle the page move
+                                        *         destination though
+                                        * 17:55 <AaronSchulz> I guess you can 
look at move log entries,
+                                        *         pull out the params and grab 
the dest
+                                        * 17:55 <AaronSchulz> doable, a bit 
code though
+                                        * 17:55 <AaronSchulz> not too much 
though
+                                        *
+                                        */
+                               }
+
+                               $this->verbose(
+                                       "Purged file {$row->log_title}; {$type} 
on {$row->log_timestamp}.\n" );
+                       }
+               }
+       }
+
+       protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile 
$file ) {
+               $dbr = $repo->getSlaveDB();
+               $res = $dbr->select( 'filearchive',
+                       array( 'fa_archive_name' ),
+                       array( 'fa_name' => $file->getName() ),
+                       __METHOD__
+               );
+               foreach ( $res as $row ) {
+                       if ( $row->fa_archive_name === null ) {
+                               continue; // was not an old version (current 
version names checked already)
+                       }
+                       $ofile = $repo->newFromArchiveName( $file->getTitle(), 
$row->fa_archive_name );
+                       // If there is an orphaned storage file still 
there...delete it
+                       if ( !$file->exists() && $repo->fileExists( 
$ofile->getPath() ) ) {
+                               $dpath = $this->getDeletedPath( $repo, $ofile );
+                               if ( $repo->fileExists( $dpath ) ) { // sanity 
check to avoid data loss
+                                       $repo->getBackend()->delete( array( 
'src' => $ofile->getPath() ) );
+                                       $this->output( "Deleted orphan file: 
{$ofile->getPath()}.\n" );
+                               } else {
+                                       $this->error( "File was not deleted: 
{$ofile->getPath()}.\n" );
+                               }
+                       }
+                       $file->purgeOldThumbnails( $row->fa_archive_name );
+               }
+       }
+
+       protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) {
+               $hash = $repo->getFileSha1( $file->getPath() );
+               $key = "{$hash}.{$file->getExtension()}";
+               return $repo->getDeletedHashPath( $key ) . $key;
+       }
+
+       /**
+        * Send an output message iff the 'verbose' option has been provided.
+        *
+        * @param string $msg Message to output
+        */
+       protected function verbose( $msg ) {
+               if ( $this->hasOption( 'verbose' ) ) {
+                       $this->output( $msg );
+               }
+       }
+
+}
+
+$maintClass = "PurgeChangedFiles";
+require_once RUN_MAINTENANCE_IF_MAIN;
diff --git a/maintenance/purgeDeletedFiles.php 
b/maintenance/purgeDeletedFiles.php
deleted file mode 100644
index 9f2af33..0000000
--- a/maintenance/purgeDeletedFiles.php
+++ /dev/null
@@ -1,125 +0,0 @@
-<?php
-/**
- * Scan the deletion log and purges affected files within a timeframe.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Maintenance
- */
-
-require_once __DIR__ . '/Maintenance.php';
-
-/**
- * Maintenance script that scans the deletion log and purges affected files
- * within a timeframe.
- *
- * @ingroup Maintenance
- */
-class PurgeDeletedFiles extends Maintenance {
-       public function __construct() {
-               parent::__construct();
-               $this->mDescription = "Scan the logging table and purge files 
that where deleted.";
-               $this->addOption( 'starttime', 'Starting timestamp', false, 
true );
-               $this->addOption( 'endtime', 'Ending timestamp', false, true );
-       }
-
-       public function execute() {
-               $this->output( "Purging cache and thumbnails for deleted 
files...\n" );
-               $this->purgeFromLogType( 'delete' );
-               $this->output( "...deleted files purged.\n\n" );
-
-               $this->output( "Purging cache and thumbnails for suppressed 
files...\n" );
-               $this->purgeFromLogType( 'suppress' );
-               $this->output( "...suppressed files purged.\n" );
-       }
-
-       protected function purgeFromLogType( $logType ) {
-               $repo = RepoGroup::singleton()->getLocalRepo();
-               $db = $repo->getSlaveDB();
-
-               $conds = array(
-                       'log_namespace' => NS_FILE,
-                       'log_type' => $logType,
-                       'log_action' => array( 'delete', 'revision' )
-               );
-               $start = $this->getOption( 'starttime' );
-               if ( $start ) {
-                       $conds[] = 'log_timestamp >= ' . $db->addQuotes( 
$db->timestamp( $start ) );
-               }
-               $end = $this->getOption( 'endtime' );
-               if ( $end ) {
-                       $conds[] = 'log_timestamp <= ' . $db->addQuotes( 
$db->timestamp( $end ) );
-               }
-
-               $res = $db->select( 'logging', array( 'log_title', 
'log_timestamp' ), $conds, __METHOD__ );
-               foreach ( $res as $row ) {
-                       $file = $repo->newFile( Title::makeTitle( NS_FILE, 
$row->log_title ) );
-                       // If there is an orphaned storage file still 
there...delete it
-                       if ( !$file->exists() && $repo->fileExists( 
$file->getPath() ) ) {
-                               $dpath = $this->getDeletedPath( $repo, $file );
-                               if ( $repo->fileExists( $dpath ) ) { // sanity 
check to avoid data loss
-                                       $repo->getBackend()->delete( array( 
'src' => $file->getPath() ) );
-                                       $this->output( "Deleted orphan file: 
{$file->getPath()}.\n" );
-                               } else {
-                                       $this->error( "File was not deleted: 
{$file->getPath()}.\n" );
-                               }
-                       }
-                       // Purge current version and any versions in oldimage 
table
-                       $file->purgeCache();
-                       $file->purgeHistory();
-                       // Purge items from fileachive table (rows are likely 
here)
-                       $this->purgeFromArchiveTable( $repo, $file );
-
-                       $this->output( "Purged file {$row->log_title}; deleted 
on {$row->log_timestamp}.\n" );
-               }
-       }
-
-       protected function purgeFromArchiveTable( LocalRepo $repo, LocalFile 
$file ) {
-               $db = $repo->getSlaveDB();
-               $res = $db->select( 'filearchive',
-                       array( 'fa_archive_name' ),
-                       array( 'fa_name' => $file->getName() ),
-                       __METHOD__
-               );
-               foreach ( $res as $row ) {
-                       if ( $row->fa_archive_name === null ) {
-                               continue; // was not an old version (current 
version names checked already)
-                       }
-                       $ofile = $repo->newFromArchiveName( $file->getTitle(), 
$row->fa_archive_name );
-                       // If there is an orphaned storage file still 
there...delete it
-                       if ( !$file->exists() && $repo->fileExists( 
$ofile->getPath() ) ) {
-                               $dpath = $this->getDeletedPath( $repo, $ofile );
-                               if ( $repo->fileExists( $dpath ) ) { // sanity 
check to avoid data loss
-                                       $repo->getBackend()->delete( array( 
'src' => $ofile->getPath() ) );
-                                       $this->output( "Deleted orphan file: 
{$ofile->getPath()}.\n" );
-                               } else {
-                                       $this->error( "File was not deleted: 
{$ofile->getPath()}.\n" );
-                               }
-                       }
-                       $file->purgeOldThumbnails( $row->fa_archive_name );
-               }
-       }
-
-       protected function getDeletedPath( LocalRepo $repo, LocalFile $file ) {
-               $hash = $repo->getFileSha1( $file->getPath() );
-               $key = "{$hash}.{$file->getExtension()}";
-               return $repo->getDeletedHashPath( $key ) . $key;
-       }
-}
-
-$maintClass = "PurgeDeletedFiles";
-require_once RUN_MAINTENANCE_IF_MAIN;

-- 
To view, visit https://gerrit.wikimedia.org/r/87635
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7181ea461ba9758747bff4fd70530d00dab492cc
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: BryanDavis <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to