https://www.mediawiki.org/wiki/Special:Code/MediaWiki/113704

Revision: 113704
Author:   aaron
Date:     2012-03-13 01:46:33 +0000 (Tue, 13 Mar 2012)
Log Message:
-----------
[FileBackend]
* Added FileJournal class to log file changes for file backends. This can be 
used for migrations (like moving to Swift), syncing mirror repos, consistency 
checks, finishing/reverting operation batches, and such. The default journal is 
the "null" journal, which simply does nothing.
* Added the optional schema change required for using the DBFileJournal (MySQL, 
SQLite).

Modified Paths:
--------------
    trunk/phase3/includes/AutoLoader.php
    trunk/phase3/includes/filerepo/backend/FileBackend.php
    trunk/phase3/includes/filerepo/backend/FileBackendMultiWrite.php
    trunk/phase3/includes/filerepo/backend/FileBackendStore.php
    trunk/phase3/includes/filerepo/backend/FileOp.php
    trunk/phase3/languages/messages/MessagesEn.php
    trunk/phase3/maintenance/language/messages.inc

Added Paths:
-----------
    trunk/phase3/includes/filerepo/backend/filejournal/
    trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php
    trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php
    trunk/phase3/maintenance/archives/patch-filejournal.sql

Modified: trunk/phase3/includes/AutoLoader.php
===================================================================
--- trunk/phase3/includes/AutoLoader.php        2012-03-13 01:44:39 UTC (rev 
113703)
+++ trunk/phase3/includes/AutoLoader.php        2012-03-13 01:46:33 UTC (rev 
113704)
@@ -507,6 +507,9 @@
        'FSFileBackendFileList' => 
'includes/filerepo/backend/FSFileBackend.php',
        'SwiftFileBackend' => 'includes/filerepo/backend/SwiftFileBackend.php',
        'SwiftFileBackendFileList' => 
'includes/filerepo/backend/SwiftFileBackend.php',
+       'FileJournal' => 
'includes/filerepo/backend/filejournal/FileJournal.php',
+       'DBFileJournal' => 
'includes/filerepo/backend/filejournal/DBFileJournal.php',
+       'NullFileJournal' => 
'includes/filerepo/backend/filejournal/FileJournal.php',
        'LockManagerGroup' => 
'includes/filerepo/backend/lockmanager/LockManagerGroup.php',
        'LockManager' => 
'includes/filerepo/backend/lockmanager/LockManager.php',
        'ScopedLock' => 'includes/filerepo/backend/lockmanager/LockManager.php',

Modified: trunk/phase3/includes/filerepo/backend/FileBackend.php
===================================================================
--- trunk/phase3/includes/filerepo/backend/FileBackend.php      2012-03-13 
01:44:39 UTC (rev 113703)
+++ trunk/phase3/includes/filerepo/backend/FileBackend.php      2012-03-13 
01:46:33 UTC (rev 113704)
@@ -45,6 +45,8 @@
        protected $readOnly; // string; read-only explanation message
        /** @var LockManager */
        protected $lockManager;
+       /** @var FileJournal */
+       protected $fileJournal;
 
        /**
         * Create a new backend instance from configuration.
@@ -73,6 +75,9 @@
                $this->lockManager = ( $config['lockManager'] instanceof 
LockManager )
                        ? $config['lockManager']
                        : LockManagerGroup::singleton()->get( 
$config['lockManager'] );
+               $this->fileJournal = isset( $config['fileJournal'] )
+                       ? FileJournal::factory( $config['fileJournal'], 
$this->name )
+                       : FileJournal::factory( array( 'class' => 
'NullFileJournal' ), $this->name );
                $this->readOnly = isset( $config['readOnly'] )
                        ? (string)$config['readOnly']
                        : '';
@@ -177,6 +182,8 @@
         * 'allowStale'          : Don't require the latest available data.
         *                         This can increase performance for 
non-critical writes.
         *                         This has no effect unless the 'force' flag 
is set.
+        * 'nonJournaled'        : Don't log this operation batch in the file 
journal.
+        *                         This limits the ability of recovery scripts.
         * 
         * Remarks on locking:
         * File system paths given to operations should refer to files that are

Modified: trunk/phase3/includes/filerepo/backend/FileBackendMultiWrite.php
===================================================================
--- trunk/phase3/includes/filerepo/backend/FileBackendMultiWrite.php    
2012-03-13 01:44:39 UTC (rev 113703)
+++ trunk/phase3/includes/filerepo/backend/FileBackendMultiWrite.php    
2012-03-13 01:46:33 UTC (rev 113704)
@@ -133,7 +133,7 @@
                }
 
                // Actually attempt the operation batch...
-               $subStatus = FileOp::attemptBatch( $performOps, $opts );
+               $subStatus = FileOp::attemptBatch( $performOps, $opts, 
$this->fileJournal );
 
                $success = array();
                $failCount = 0;

Modified: trunk/phase3/includes/filerepo/backend/FileBackendStore.php
===================================================================
--- trunk/phase3/includes/filerepo/backend/FileBackendStore.php 2012-03-13 
01:44:39 UTC (rev 113703)
+++ trunk/phase3/includes/filerepo/backend/FileBackendStore.php 2012-03-13 
01:46:33 UTC (rev 113704)
@@ -708,7 +708,7 @@
                $this->clearCache();
 
                // Actually attempt the operation batch...
-               $subStatus = FileOp::attemptBatch( $performOps, $opts );
+               $subStatus = FileOp::attemptBatch( $performOps, $opts, 
$this->fileJournal );
 
                // Merge errors into status fields
                $status->merge( $subStatus );

Modified: trunk/phase3/includes/filerepo/backend/FileOp.php
===================================================================
--- trunk/phase3/includes/filerepo/backend/FileOp.php   2012-03-13 01:44:39 UTC 
(rev 113703)
+++ trunk/phase3/includes/filerepo/backend/FileOp.php   2012-03-13 01:46:33 UTC 
(rev 113704)
@@ -24,6 +24,7 @@
        protected $state = self::STATE_NEW; // integer
        protected $failed = false; // boolean
        protected $useLatest = true; // boolean
+       protected $batchId; // string
 
        protected $sourceSha1; // string
        protected $destSameAsSource; // boolean
@@ -63,6 +64,16 @@
        }
 
        /**
+        * Set the batch UUID this operation belongs to
+        *
+        * @param $batchId string
+        * @return void
+        */
+       final protected function setBatchId( $batchId ) {
+               $this->batchId = $batchId;
+       }
+
+       /**
         * Whether to allow stale data for file reads and stat checks
         *
         * @param $allowStale bool
@@ -73,43 +84,57 @@
        }
 
        /**
-        * Attempt a series of file operations.
+        * Attempt to perform a series of file operations.
         * Callers are responsible for handling file locking.
         * 
         * $opts is an array of options, including:
-        * 'force'      : Errors that would normally cause a rollback do not.
-        *                The remaining operations are still attempted if any 
fail.
-        * 'allowStale' : Don't require the latest available data.
-        *                This can increase performance for non-critical writes.
-        *                This has no effect unless the 'force' flag is set.
-        *
+        * 'force'        : Errors that would normally cause a rollback do not.
+        *                  The remaining operations are still attempted if any 
fail.
+        * 'allowStale'   : Don't require the latest available data.
+        *                  This can increase performance for non-critical 
writes.
+        *                  This has no effect unless the 'force' flag is set.
+        * 'nonJournaled' : Don't log this operation batch in the file journal.
+        * 
         * The resulting Status will be "OK" unless:
         *     a) unexpected operation errors occurred (network partitions, 
disk full...)
         *     b) significant operation errors occured and 'force' was not set
         * 
         * @param $performOps Array List of FileOp operations
         * @param $opts Array Batch operation options
+        * @param $journal FileJournal Journal to log operations to
         * @return Status 
         */
-       final public static function attemptBatch( array $performOps, array 
$opts ) {
+       final public static function attemptBatch(
+               array $performOps, array $opts, FileJournal $journal
+       ) {
                $status = Status::newGood();
 
-               $allowStale = !empty( $opts['allowStale'] );
-               $ignoreErrors = !empty( $opts['force'] );
-
                $n = count( $performOps );
                if ( $n > self::MAX_BATCH_SIZE ) {
                        $status->fatal( 'backend-fail-batchsize', $n, 
self::MAX_BATCH_SIZE );
                        return $status;
                }
 
+               $batchId = $journal->getTimestampedUUID();
+               $allowStale = !empty( $opts['allowStale'] );
+               $ignoreErrors = !empty( $opts['force'] );
+               $journaled = empty( $opts['nonJournaled'] );
+
+               $entries = array(); // file journal entries
                $predicates = FileOp::newPredicates(); // account for previous 
op in prechecks
                // Do pre-checks for each operation; abort on failure...
                foreach ( $performOps as $index => $fileOp ) {
+                       $fileOp->setBatchId( $batchId );
                        $fileOp->allowStaleReads( $allowStale );
-                       $subStatus = $fileOp->precheck( $predicates );
+                       $oldPredicates = $predicates;
+                       $subStatus = $fileOp->precheck( $predicates ); // 
updates $predicates
                        $status->merge( $subStatus );
-                       if ( !$subStatus->isOK() ) { // operation failed?
+                       if ( $subStatus->isOK() ) {
+                               if ( $journaled ) { // journal log entry
+                                       $entries = array_merge( $entries,
+                                               self::getJournalEntries( 
$fileOp, $oldPredicates, $predicates ) );
+                               }
+                       } else { // operation failed?
                                $status->success[$index] = false;
                                ++$status->failCount;
                                if ( !$ignoreErrors ) {
@@ -118,8 +143,15 @@
                        }
                }
 
-               if ( $ignoreErrors ) {
-                       # Treat all precheck() fatals as merely warnings
+               // Log the operations in file journal...
+               if ( count( $entries ) ) {
+                       $subStatus = $journal->logChangeBatch( $entries, 
$batchId );
+                       if ( !$subStatus->isOK() ) {
+                               return $subStatus; // abort
+                       }
+               }
+
+               if ( $ignoreErrors ) { // treat precheck() fatals as mere 
warnings
                        $status->setResult( true, $status->value );
                }
 
@@ -155,6 +187,46 @@
        }
 
        /**
+        * Get the file journal entries for a single file operation
+        * 
+        * @param $fileOp FileOp
+        * @param $oPredicates Array Pre-op information about files
+        * @param $nPredicates Array Post-op information about files
+        * @return Array
+        */
+       final protected static function getJournalEntries(
+               FileOp $fileOp, array $oPredicates, array $nPredicates
+       ) {
+               $nullEntries = array();
+               $updateEntries = array();
+               $deleteEntries = array();
+               $pathsUsed = array_merge( $fileOp->storagePathsRead(), 
$fileOp->storagePathsChanged() );
+               foreach ( $pathsUsed as $path ) {
+                       $nullEntries[] = array( // assertion for recovery
+                               'op'      => 'null',
+                               'path'    => $path,
+                               'newSha1' => $fileOp->fileSha1( $path, 
$oPredicates )
+                       );
+               }
+               foreach ( $fileOp->storagePathsChanged() as $path ) {
+                       if ( $nPredicates['sha1'][$path] === false ) { // 
deleted
+                               $deleteEntries[] = array(
+                                       'op'      => 'delete',
+                                       'path'    => $path,
+                                       'newSha1' => ''
+                               );
+                       } else { // created/updated
+                               $updateEntries[] = array(
+                                       'op'      => $fileOp->fileExists( 
$path, $oPredicates ) ? 'update' : 'create',
+                                       'path'    => $path,
+                                       'newSha1' => $nPredicates['sha1'][$path]
+                               );
+                       }
+               }
+               return array_merge( $nullEntries, $updateEntries, 
$deleteEntries );
+       }
+
+       /**
         * Get the value of the parameter with the given name
         * 
         * @param $name string
@@ -352,8 +424,8 @@
                $params = $this->params;
                $params['failedAction'] = $action;
                try {
-                       wfDebugLog( 'FileOperation',
-                               get_class( $this ) . ' failed: ' . 
FormatJson::encode( $params ) );
+                       wfDebugLog( 'FileOperation', get_class( $this ) .
+                               " failed (batch #{$this->batchId}): " . 
FormatJson::encode( $params ) );
                } catch ( Exception $e ) {
                        // bad config? debug log error?
                }

Added: trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php
===================================================================
--- trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php        
                        (rev 0)
+++ trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php        
2012-03-13 01:46:33 UTC (rev 113704)
@@ -0,0 +1,112 @@
+<?php
+/**
+ * @file
+ * @ingroup FileJournal
+ * @author Aaron Schulz
+ */
+
+/**
+ * Version of FileJournal that logs to a DB table
+ * @since 1.20
+ */
+class DBFileJournal extends FileJournal {
+       protected $wiki = false; // string; wiki DB name
+
+       /**
+        * Construct a new instance from configuration.
+        * $config includes:
+        *     'wiki' : wiki name to use for LoadBalancer
+        * 
+        * @param $config Array
+        */
+       protected function __construct( array $config ) {
+               parent::__construct( $config );
+
+               $this->wiki = $config['wiki'];
+       }
+
+       /**
+        * @see FileJournal::logChangeBatch()
+        * @return Status 
+        */
+       protected function doLogChangeBatch( array $entries, $batchId ) {
+               $status = Status::newGood();
+
+               $dbw = $this->getMasterDB();
+               if ( !$dbw ) {
+                       $status->fatal( 'filejournal-fail-dbconnect', 
$this->backend );
+                       return $status;
+               }
+               $now = wfTimestamp( TS_UNIX );
+
+               $data = array();
+               foreach ( $entries as $entry ) {
+                       $data[] = array(
+                               'fj_batch_uuid' => $batchId,
+                               'fj_backend'    => $this->backend,
+                               'fj_op'         => $entry['op'],
+                               'fj_path'       => $entry['path'],
+                               'fj_path_sha1'  => wfBaseConvert( sha1( 
$entry['path'] ), 16, 36, 31 ),
+                               'fj_new_sha1'   => $entry['newSha1'],
+                               'fj_timestamp'  => $dbw->timestamp( $now )
+                       );
+               }
+
+               try {
+                       $dbw->begin();
+                       $dbw->insert( 'filejournal', $data, __METHOD__ );
+                       $dbw->commit();
+               } catch ( DBError $e ) {
+                       $status->fatal( 'filejournal-fail-dbquery', 
$this->backend );
+                       return $status;
+               }
+
+               return $status;
+       }
+
+       /**
+        * @see FileJournal::purgeOldLogs()
+        * @return Status
+        */
+       protected function doPurgeOldLogs() {
+               $status = Status::newGood();
+               if ( $this->ttlDays <= 0 ) {
+                       return $status; // nothing to do
+               }
+
+               $dbw = $this->getMasterDB();
+               if ( !$dbw ) {
+                       $status->fatal( 'filejournal-fail-dbconnect', 
$this->backend );
+                       return $status;
+               }
+               $dbCutoff = $dbw->timestamp( time() - 86400 * $this->ttlDays );
+
+               try {
+                       $dbw->begin();
+                       $dbw->delete( 'filejournal',
+                               array( 'fj_timestamp < ' . $dbw->addQuotes( 
$dbCutoff ) ),
+                               __METHOD__
+                       );
+                       $dbw->commit();
+               } catch ( DBError $e ) {
+                       $status->fatal( 'filejournal-fail-dbquery', 
$this->backend );
+                       return $status;
+               }
+
+               return $status;
+       }
+
+       /**
+        * Get a master connection to the logging DB
+        * 
+        * @return DatabaseBase|null 
+        */
+       protected function getMasterDB() {
+               try {
+                       $lb = wfGetLBFactory()->newMainLB();
+                       return $lb->getConnection( DB_MASTER, array(), 
$this->wiki );
+               } catch ( DBConnectionError $e ) {
+                       return null;
+               }
+       }
+}


Property changes on: 
trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php
___________________________________________________________________
Added: svn:eol-style
   + native

Added: trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php
===================================================================
--- trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php          
                (rev 0)
+++ trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php  
2012-03-13 01:46:33 UTC (rev 113704)
@@ -0,0 +1,131 @@
+<?php
+/**
+ * @defgroup FileJournal File journal
+ * @ingroup FileBackend
+ */
+
+/**
+ * @file
+ * @ingroup FileJournal
+ * @author Aaron Schulz
+ */
+
+/**
+ * @brief Class for handling file operation journaling.
+ *
+ * Subclasses should avoid throwing exceptions at all costs.
+ *
+ * @ingroup FileJournal
+ * @since 1.20
+ */
+abstract class FileJournal {
+       protected $backend; // string
+       protected $ttlDays; // integer
+
+       /**
+        * Construct a new instance from configuration.
+        * $config includes:
+        *     'ttlDays' : days to keep log entries around (false means 
"forever")
+        * 
+        * @param $config Array
+        */
+       protected function __construct( array $config ) {
+               $this->ttlDays = isset( $config['ttlDays'] ) ? 
$config['ttlDays'] : false;
+       }
+
+       /**
+        * Create an appropriate FileJournal object from config
+        * 
+        * @param $config Array
+        * @param $backend string A registered file backend name
+        * @return FileJournal
+        */
+       final public static function factory( array $config, $backend ) {
+               $class = $config['class'];
+               $jrn = new $class( $config );
+               if ( !$jrn instanceof self ) {
+                       throw new MWException( "Class given is not an instance 
of FileJournal." );
+               }
+               $jrn->backend = $backend;
+               return $jrn;
+       }
+
+       /**
+        * Get a statistically unique ID string
+        * 
+        * @return string <9 char TS_MW timestamp in base 36><22 random base 36 
chars>
+        */
+       final public function getTimestampedUUID() {
+               $s = '';
+               for ( $i = 0; $i < 5; $i++ ) {
+                       $s .= mt_rand( 0, 2147483647 );
+               }
+               $s = wfBaseConvert( sha1( $s ), 16, 36, 31 );
+               return substr( wfBaseConvert( wfTimestamp( TS_MW ), 10, 36, 9 ) 
. $s, 0, 31 );
+       }
+
+       /**
+        * Log changes made by a batch file operation.
+        * $entries is an array of log entries, each of which contains:
+        *     op      : Basic operation name (create, store, copy, delete)
+        *     path    : The storage path of the file
+        *     newSha1 : The final base 36 SHA-1 of the file
+        * Note that 'false' should be used as the SHA-1 for non-existing files.
+        * 
+        * @param $entries Array List of file operations (each an array of 
parameters)
+        * @param $batchId string UUID string that identifies the operation 
batch
+        * @return Status
+        */
+       final public function logChangeBatch( array $entries, $batchId ) {
+               if ( !count( $entries ) ) {
+                       return Status::newGood();
+               }
+               return $this->doLogChangeBatch( $entries, $batchId );
+       }
+
+       /**
+        * @see FileJournal::logChangeBatch()
+        * 
+        * @param $entries Array List of file operations (each an array of 
parameters)
+        * @param $batchId string UUID string that identifies the operation 
batch
+        * @return Status
+        */
+       abstract protected function doLogChangeBatch( array $entries, $batchId 
);
+
+       /**
+        * Purge any old log entries
+        * 
+        * @return Status 
+        */
+       final public function purgeOldLogs() {
+               return $this->doPurgeOldLogs();
+       }
+
+       /**
+        * @see FileJournal::purgeOldLogs()
+        * @return Status
+        */
+       abstract protected function doPurgeOldLogs();
+}
+
+/**
+ * Simple version of FileJournal that does nothing
+ * @since 1.20
+ */
+class NullFileJournal extends FileJournal {
+       /**
+        * @see FileJournal::logChangeBatch()
+        * @return Status 
+        */
+       protected function doLogChangeBatch( array $entries, $batchId ) {
+               return Status::newGood();
+       }
+
+       /**
+        * @see FileJournal::purgeOldLogs()
+        * @return Status
+        */
+       protected function doPurgeOldLogs() {
+               return Status::newGood();
+       }
+}


Property changes on: 
trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php
___________________________________________________________________
Added: svn:eol-style
   + native

Modified: trunk/phase3/languages/messages/MessagesEn.php
===================================================================
--- trunk/phase3/languages/messages/MessagesEn.php      2012-03-13 01:44:39 UTC 
(rev 113703)
+++ trunk/phase3/languages/messages/MessagesEn.php      2012-03-13 01:46:33 UTC 
(rev 113704)
@@ -2274,6 +2274,10 @@
 'backend-fail-contenttype'   => 'Could not determine the content type of the 
file to store at "$1".',
 'backend-fail-batchsize'     => 'Storage backend given a batch of $1 file 
{{PLURAL:$1|operation|operations}}; the limit is $2 
{{PLURAL:$2|operation|operations}}.',
 
+# File journal
+'filejournal-fail-dbconnect' => 'Could not connect to the journal database for 
storage backend "$1".',
+'filejournal-fail-dbquery'   => 'Could not update the journal database for 
storage backend "$1".',
+
 # Lock manager
 'lockmanager-notlocked'        => 'Could not unlock "$1"; it is not locked.',
 'lockmanager-fail-closelock'   => 'Could not close lock file for "$1".',

Added: trunk/phase3/maintenance/archives/patch-filejournal.sql
===================================================================
--- trunk/phase3/maintenance/archives/patch-filejournal.sql                     
        (rev 0)
+++ trunk/phase3/maintenance/archives/patch-filejournal.sql     2012-03-13 
01:46:33 UTC (rev 113704)
@@ -0,0 +1,24 @@
+-- File backend operation journal
+CREATE TABLE /*_*/filejournal (
+  -- Unique ID for each file operation
+  fj_id bigint unsigned NOT NULL PRIMARY KEY auto_increment,
+  -- UUID of the batch this operation belongs to
+  fj_batch_uuid varbinary(32) NOT NULL,
+  -- The registered file backend name
+  fj_backend varchar(255) NOT NULL,
+  -- The storage path that was affected (may be internal paths)
+  fj_path blob NOT NULL,
+  -- SHA-1 file path hash in base-36
+  fj_path_sha1 varbinary(32) NOT NULL default '',
+  -- Primitive operation description (create/update/delete)
+  fj_op varchar(16) NOT NULL default '',
+  -- SHA-1 file content hash in base-36
+  fj_new_sha1 varbinary(32) NOT NULL default '',
+  -- Timestamp of the batch operation
+  fj_timestamp varbinary(14) NOT NULL default ''
+);
+
+CREATE INDEX /*i*/fj_batch_id ON /*_*/filejournal (fj_batch_uuid,fj_id);
+CREATE INDEX /*i*/fj_path_id ON /*_*/filejournal (fj_path_sha1,fj_id);
+CREATE INDEX /*i*/fj_new_sha1 ON /*_*/filejournal (fj_new_sha1,fj_id);
+CREATE INDEX /*i*/fj_timestamp ON /*_*/filejournal (fj_timestamp);


Property changes on: trunk/phase3/maintenance/archives/patch-filejournal.sql
___________________________________________________________________
Added: svn:eol-style
   + native

Modified: trunk/phase3/maintenance/language/messages.inc
===================================================================
--- trunk/phase3/maintenance/language/messages.inc      2012-03-13 01:44:39 UTC 
(rev 113703)
+++ trunk/phase3/maintenance/language/messages.inc      2012-03-13 01:46:33 UTC 
(rev 113704)
@@ -1377,6 +1377,11 @@
                'backend-fail-batchsize'
        ),
 
+       'filejournal-errors' => array(
+               'filejournal-fail-dbconnect',
+               'filejournal-fail-dbquery'
+       ),
+
        'lockmanager-errors' => array(
                'lockmanager-notlocked',
                'lockmanager-fail-closelock',


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to