https://www.mediawiki.org/wiki/Special:Code/MediaWiki/113704
Revision: 113704
Author: aaron
Date: 2012-03-13 01:46:33 +0000 (Tue, 13 Mar 2012)
Log Message:
-----------
[FileBackend]
* Added FileJournal class to log file changes for file backends. This can be
used for migrations (like moving to Swift), syncing mirror repos, consistency
checks, finishing/reverting operation batches, and such. The default journal is
the "null" journal, which simply does nothing.
* Added the optional schema change required for using the DBFileJournal (MySQL,
SQLite).
Modified Paths:
--------------
trunk/phase3/includes/AutoLoader.php
trunk/phase3/includes/filerepo/backend/FileBackend.php
trunk/phase3/includes/filerepo/backend/FileBackendMultiWrite.php
trunk/phase3/includes/filerepo/backend/FileBackendStore.php
trunk/phase3/includes/filerepo/backend/FileOp.php
trunk/phase3/languages/messages/MessagesEn.php
trunk/phase3/maintenance/language/messages.inc
Added Paths:
-----------
trunk/phase3/includes/filerepo/backend/filejournal/
trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php
trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php
trunk/phase3/maintenance/archives/patch-filejournal.sql
Modified: trunk/phase3/includes/AutoLoader.php
===================================================================
--- trunk/phase3/includes/AutoLoader.php 2012-03-13 01:44:39 UTC (rev
113703)
+++ trunk/phase3/includes/AutoLoader.php 2012-03-13 01:46:33 UTC (rev
113704)
@@ -507,6 +507,9 @@
'FSFileBackendFileList' =>
'includes/filerepo/backend/FSFileBackend.php',
'SwiftFileBackend' => 'includes/filerepo/backend/SwiftFileBackend.php',
'SwiftFileBackendFileList' =>
'includes/filerepo/backend/SwiftFileBackend.php',
+ 'FileJournal' =>
'includes/filerepo/backend/filejournal/FileJournal.php',
+ 'DBFileJournal' =>
'includes/filerepo/backend/filejournal/DBFileJournal.php',
+ 'NullFileJournal' =>
'includes/filerepo/backend/filejournal/FileJournal.php',
'LockManagerGroup' =>
'includes/filerepo/backend/lockmanager/LockManagerGroup.php',
'LockManager' =>
'includes/filerepo/backend/lockmanager/LockManager.php',
'ScopedLock' => 'includes/filerepo/backend/lockmanager/LockManager.php',
Modified: trunk/phase3/includes/filerepo/backend/FileBackend.php
===================================================================
--- trunk/phase3/includes/filerepo/backend/FileBackend.php 2012-03-13
01:44:39 UTC (rev 113703)
+++ trunk/phase3/includes/filerepo/backend/FileBackend.php 2012-03-13
01:46:33 UTC (rev 113704)
@@ -45,6 +45,8 @@
protected $readOnly; // string; read-only explanation message
/** @var LockManager */
protected $lockManager;
+ /** @var FileJournal */
+ protected $fileJournal;
/**
* Create a new backend instance from configuration.
@@ -73,6 +75,9 @@
$this->lockManager = ( $config['lockManager'] instanceof
LockManager )
? $config['lockManager']
: LockManagerGroup::singleton()->get(
$config['lockManager'] );
+ $this->fileJournal = isset( $config['fileJournal'] )
+ ? FileJournal::factory( $config['fileJournal'],
$this->name )
+ : FileJournal::factory( array( 'class' =>
'NullFileJournal' ), $this->name );
$this->readOnly = isset( $config['readOnly'] )
? (string)$config['readOnly']
: '';
@@ -177,6 +182,8 @@
* 'allowStale' : Don't require the latest available data.
* This can increase performance for
non-critical writes.
* This has no effect unless the 'force' flag
is set.
+ * 'nonJournaled' : Don't log this operation batch in the file
journal.
+ * This limits the ability of recovery scripts.
*
* Remarks on locking:
* File system paths given to operations should refer to files that are
Modified: trunk/phase3/includes/filerepo/backend/FileBackendMultiWrite.php
===================================================================
--- trunk/phase3/includes/filerepo/backend/FileBackendMultiWrite.php
2012-03-13 01:44:39 UTC (rev 113703)
+++ trunk/phase3/includes/filerepo/backend/FileBackendMultiWrite.php
2012-03-13 01:46:33 UTC (rev 113704)
@@ -133,7 +133,7 @@
}
// Actually attempt the operation batch...
- $subStatus = FileOp::attemptBatch( $performOps, $opts );
+ $subStatus = FileOp::attemptBatch( $performOps, $opts,
$this->fileJournal );
$success = array();
$failCount = 0;
Modified: trunk/phase3/includes/filerepo/backend/FileBackendStore.php
===================================================================
--- trunk/phase3/includes/filerepo/backend/FileBackendStore.php 2012-03-13
01:44:39 UTC (rev 113703)
+++ trunk/phase3/includes/filerepo/backend/FileBackendStore.php 2012-03-13
01:46:33 UTC (rev 113704)
@@ -708,7 +708,7 @@
$this->clearCache();
// Actually attempt the operation batch...
- $subStatus = FileOp::attemptBatch( $performOps, $opts );
+ $subStatus = FileOp::attemptBatch( $performOps, $opts,
$this->fileJournal );
// Merge errors into status fields
$status->merge( $subStatus );
Modified: trunk/phase3/includes/filerepo/backend/FileOp.php
===================================================================
--- trunk/phase3/includes/filerepo/backend/FileOp.php 2012-03-13 01:44:39 UTC
(rev 113703)
+++ trunk/phase3/includes/filerepo/backend/FileOp.php 2012-03-13 01:46:33 UTC
(rev 113704)
@@ -24,6 +24,7 @@
protected $state = self::STATE_NEW; // integer
protected $failed = false; // boolean
protected $useLatest = true; // boolean
+ protected $batchId; // string
protected $sourceSha1; // string
protected $destSameAsSource; // boolean
@@ -63,6 +64,16 @@
}
/**
+ * Set the batch UUID this operation belongs to
+ *
+ * @param $batchId string
+ * @return void
+ */
+ final protected function setBatchId( $batchId ) {
+ $this->batchId = $batchId;
+ }
+
+ /**
* Whether to allow stale data for file reads and stat checks
*
* @param $allowStale bool
@@ -73,43 +84,57 @@
}
/**
- * Attempt a series of file operations.
+ * Attempt to perform a series of file operations.
* Callers are responsible for handling file locking.
*
* $opts is an array of options, including:
- * 'force' : Errors that would normally cause a rollback do not.
- * The remaining operations are still attempted if any
fail.
- * 'allowStale' : Don't require the latest available data.
- * This can increase performance for non-critical writes.
- * This has no effect unless the 'force' flag is set.
- *
+ * 'force' : Errors that would normally cause a rollback do not.
+ * The remaining operations are still attempted if any
fail.
+ * 'allowStale' : Don't require the latest available data.
+ * This can increase performance for non-critical
writes.
+ * This has no effect unless the 'force' flag is set.
+ * 'nonJournaled' : Don't log this operation batch in the file journal.
+ *
* The resulting Status will be "OK" unless:
* a) unexpected operation errors occurred (network partitions,
disk full...)
* b) significant operation errors occured and 'force' was not set
*
* @param $performOps Array List of FileOp operations
* @param $opts Array Batch operation options
+ * @param $journal FileJournal Journal to log operations to
* @return Status
*/
- final public static function attemptBatch( array $performOps, array
$opts ) {
+ final public static function attemptBatch(
+ array $performOps, array $opts, FileJournal $journal
+ ) {
$status = Status::newGood();
- $allowStale = !empty( $opts['allowStale'] );
- $ignoreErrors = !empty( $opts['force'] );
-
$n = count( $performOps );
if ( $n > self::MAX_BATCH_SIZE ) {
$status->fatal( 'backend-fail-batchsize', $n,
self::MAX_BATCH_SIZE );
return $status;
}
+ $batchId = $journal->getTimestampedUUID();
+ $allowStale = !empty( $opts['allowStale'] );
+ $ignoreErrors = !empty( $opts['force'] );
+ $journaled = empty( $opts['nonJournaled'] );
+
+ $entries = array(); // file journal entries
$predicates = FileOp::newPredicates(); // account for previous
op in prechecks
// Do pre-checks for each operation; abort on failure...
foreach ( $performOps as $index => $fileOp ) {
+ $fileOp->setBatchId( $batchId );
$fileOp->allowStaleReads( $allowStale );
- $subStatus = $fileOp->precheck( $predicates );
+ $oldPredicates = $predicates;
+ $subStatus = $fileOp->precheck( $predicates ); //
updates $predicates
$status->merge( $subStatus );
- if ( !$subStatus->isOK() ) { // operation failed?
+ if ( $subStatus->isOK() ) {
+ if ( $journaled ) { // journal log entry
+ $entries = array_merge( $entries,
+ self::getJournalEntries(
$fileOp, $oldPredicates, $predicates ) );
+ }
+ } else { // operation failed?
$status->success[$index] = false;
++$status->failCount;
if ( !$ignoreErrors ) {
@@ -118,8 +143,15 @@
}
}
- if ( $ignoreErrors ) {
- # Treat all precheck() fatals as merely warnings
+ // Log the operations in file journal...
+ if ( count( $entries ) ) {
+ $subStatus = $journal->logChangeBatch( $entries,
$batchId );
+ if ( !$subStatus->isOK() ) {
+ return $subStatus; // abort
+ }
+ }
+
+ if ( $ignoreErrors ) { // treat precheck() fatals as mere
warnings
$status->setResult( true, $status->value );
}
@@ -155,6 +187,46 @@
}
/**
+ * Get the file journal entries for a single file operation
+ *
+ * @param $fileOp FileOp
+ * @param $oPredicates Array Pre-op information about files
+ * @param $nPredicates Array Post-op information about files
+ * @return Array
+ */
+ final protected static function getJournalEntries(
+ FileOp $fileOp, array $oPredicates, array $nPredicates
+ ) {
+ $nullEntries = array();
+ $updateEntries = array();
+ $deleteEntries = array();
+ $pathsUsed = array_merge( $fileOp->storagePathsRead(),
$fileOp->storagePathsChanged() );
+ foreach ( $pathsUsed as $path ) {
+ $nullEntries[] = array( // assertion for recovery
+ 'op' => 'null',
+ 'path' => $path,
+ 'newSha1' => $fileOp->fileSha1( $path,
$oPredicates )
+ );
+ }
+ foreach ( $fileOp->storagePathsChanged() as $path ) {
+ if ( $nPredicates['sha1'][$path] === false ) { //
deleted
+ $deleteEntries[] = array(
+ 'op' => 'delete',
+ 'path' => $path,
+ 'newSha1' => ''
+ );
+ } else { // created/updated
+ $updateEntries[] = array(
+ 'op' => $fileOp->fileExists(
$path, $oPredicates ) ? 'update' : 'create',
+ 'path' => $path,
+ 'newSha1' => $nPredicates['sha1'][$path]
+ );
+ }
+ }
+ return array_merge( $nullEntries, $updateEntries,
$deleteEntries );
+ }
+
+ /**
* Get the value of the parameter with the given name
*
* @param $name string
@@ -352,8 +424,8 @@
$params = $this->params;
$params['failedAction'] = $action;
try {
- wfDebugLog( 'FileOperation',
- get_class( $this ) . ' failed: ' .
FormatJson::encode( $params ) );
+ wfDebugLog( 'FileOperation', get_class( $this ) .
+ " failed (batch #{$this->batchId}): " .
FormatJson::encode( $params ) );
} catch ( Exception $e ) {
// bad config? debug log error?
}
Added: trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php
===================================================================
--- trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php
(rev 0)
+++ trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php
2012-03-13 01:46:33 UTC (rev 113704)
@@ -0,0 +1,112 @@
+<?php
+/**
+ * @file
+ * @ingroup FileJournal
+ * @author Aaron Schulz
+ */
+
+/**
+ * Version of FileJournal that logs to a DB table
+ * @since 1.20
+ */
+class DBFileJournal extends FileJournal {
+ protected $wiki = false; // string; wiki DB name
+
+ /**
+ * Construct a new instance from configuration.
+ * $config includes:
+ * 'wiki' : wiki name to use for LoadBalancer
+ *
+ * @param $config Array
+ */
+ protected function __construct( array $config ) {
+ parent::__construct( $config );
+
+ $this->wiki = $config['wiki'];
+ }
+
+ /**
+ * @see FileJournal::logChangeBatch()
+ * @return Status
+ */
+ protected function doLogChangeBatch( array $entries, $batchId ) {
+ $status = Status::newGood();
+
+ $dbw = $this->getMasterDB();
+ if ( !$dbw ) {
+ $status->fatal( 'filejournal-fail-dbconnect',
$this->backend );
+ return $status;
+ }
+ $now = wfTimestamp( TS_UNIX );
+
+ $data = array();
+ foreach ( $entries as $entry ) {
+ $data[] = array(
+ 'fj_batch_uuid' => $batchId,
+ 'fj_backend' => $this->backend,
+ 'fj_op' => $entry['op'],
+ 'fj_path' => $entry['path'],
+ 'fj_path_sha1' => wfBaseConvert( sha1(
$entry['path'] ), 16, 36, 31 ),
+ 'fj_new_sha1' => $entry['newSha1'],
+ 'fj_timestamp' => $dbw->timestamp( $now )
+ );
+ }
+
+ try {
+ $dbw->begin();
+ $dbw->insert( 'filejournal', $data, __METHOD__ );
+ $dbw->commit();
+ } catch ( DBError $e ) {
+ $status->fatal( 'filejournal-fail-dbquery',
$this->backend );
+ return $status;
+ }
+
+ return $status;
+ }
+
+ /**
+ * @see FileJournal::purgeOldLogs()
+ * @return Status
+ */
+ protected function doPurgeOldLogs() {
+ $status = Status::newGood();
+ if ( $this->ttlDays <= 0 ) {
+ return $status; // nothing to do
+ }
+
+ $dbw = $this->getMasterDB();
+ if ( !$dbw ) {
+ $status->fatal( 'filejournal-fail-dbconnect',
$this->backend );
+ return $status;
+ }
+ $dbCutoff = $dbw->timestamp( time() - 86400 * $this->ttlDays );
+
+ try {
+ $dbw->begin();
+ $dbw->delete( 'filejournal',
+ array( 'fj_timestamp < ' . $dbw->addQuotes(
$dbCutoff ) ),
+ __METHOD__
+ );
+ $dbw->commit();
+ } catch ( DBError $e ) {
+ $status->fatal( 'filejournal-fail-dbquery',
$this->backend );
+ return $status;
+ }
+
+ return $status;
+ }
+
+ /**
+ * Get a master connection to the logging DB
+ *
+ * @return DatabaseBase|null
+ */
+ protected function getMasterDB() {
+ try {
+ $lb = wfGetLBFactory()->newMainLB();
+ return $lb->getConnection( DB_MASTER, array(),
$this->wiki );
+ } catch ( DBConnectionError $e ) {
+ return null;
+ }
+ }
+}
Property changes on:
trunk/phase3/includes/filerepo/backend/filejournal/DBFileJournal.php
___________________________________________________________________
Added: svn:eol-style
+ native
Added: trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php
===================================================================
--- trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php
(rev 0)
+++ trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php
2012-03-13 01:46:33 UTC (rev 113704)
@@ -0,0 +1,131 @@
+<?php
+/**
+ * @defgroup FileJournal File journal
+ * @ingroup FileBackend
+ */
+
+/**
+ * @file
+ * @ingroup FileJournal
+ * @author Aaron Schulz
+ */
+
+/**
+ * @brief Class for handling file operation journaling.
+ *
+ * Subclasses should avoid throwing exceptions at all costs.
+ *
+ * @ingroup FileJournal
+ * @since 1.20
+ */
+abstract class FileJournal {
+ protected $backend; // string
+ protected $ttlDays; // integer
+
+ /**
+ * Construct a new instance from configuration.
+ * $config includes:
+ * 'ttlDays' : days to keep log entries around (false means
"forever")
+ *
+ * @param $config Array
+ */
+ protected function __construct( array $config ) {
+ $this->ttlDays = isset( $config['ttlDays'] ) ?
$config['ttlDays'] : false;
+ }
+
+ /**
+ * Create an appropriate FileJournal object from config
+ *
+ * @param $config Array
+ * @param $backend string A registered file backend name
+ * @return FileJournal
+ */
+ final public static function factory( array $config, $backend ) {
+ $class = $config['class'];
+ $jrn = new $class( $config );
+ if ( !$jrn instanceof self ) {
+ throw new MWException( "Class given is not an instance
of FileJournal." );
+ }
+ $jrn->backend = $backend;
+ return $jrn;
+ }
+
+ /**
+ * Get a statistically unique ID string
+ *
+ * @return string <9 char TS_MW timestamp in base 36><22 random base 36
chars>
+ */
+ final public function getTimestampedUUID() {
+ $s = '';
+ for ( $i = 0; $i < 5; $i++ ) {
+ $s .= mt_rand( 0, 2147483647 );
+ }
+ $s = wfBaseConvert( sha1( $s ), 16, 36, 31 );
+ return substr( wfBaseConvert( wfTimestamp( TS_MW ), 10, 36, 9 )
. $s, 0, 31 );
+ }
+
+ /**
+ * Log changes made by a batch file operation.
+ * $entries is an array of log entries, each of which contains:
+ * op : Basic operation name (create, store, copy, delete)
+ * path : The storage path of the file
+ * newSha1 : The final base 36 SHA-1 of the file
+ * Note that 'false' should be used as the SHA-1 for non-existing files.
+ *
+ * @param $entries Array List of file operations (each an array of
parameters)
+ * @param $batchId string UUID string that identifies the operation
batch
+ * @return Status
+ */
+ final public function logChangeBatch( array $entries, $batchId ) {
+ if ( !count( $entries ) ) {
+ return Status::newGood();
+ }
+ return $this->doLogChangeBatch( $entries, $batchId );
+ }
+
+ /**
+ * @see FileJournal::logChangeBatch()
+ *
+ * @param $entries Array List of file operations (each an array of
parameters)
+ * @param $batchId string UUID string that identifies the operation
batch
+ * @return Status
+ */
+ abstract protected function doLogChangeBatch( array $entries, $batchId
);
+
+ /**
+ * Purge any old log entries
+ *
+ * @return Status
+ */
+ final public function purgeOldLogs() {
+ return $this->doPurgeOldLogs();
+ }
+
+ /**
+ * @see FileJournal::purgeOldLogs()
+ * @return Status
+ */
+ abstract protected function doPurgeOldLogs();
+}
+
+/**
+ * Simple version of FileJournal that does nothing
+ * @since 1.20
+ */
+class NullFileJournal extends FileJournal {
+ /**
+ * @see FileJournal::logChangeBatch()
+ * @return Status
+ */
+ protected function doLogChangeBatch( array $entries, $batchId ) {
+ return Status::newGood();
+ }
+
+ /**
+ * @see FileJournal::purgeOldLogs()
+ * @return Status
+ */
+ protected function doPurgeOldLogs() {
+ return Status::newGood();
+ }
+}
Property changes on:
trunk/phase3/includes/filerepo/backend/filejournal/FileJournal.php
___________________________________________________________________
Added: svn:eol-style
+ native
Modified: trunk/phase3/languages/messages/MessagesEn.php
===================================================================
--- trunk/phase3/languages/messages/MessagesEn.php 2012-03-13 01:44:39 UTC
(rev 113703)
+++ trunk/phase3/languages/messages/MessagesEn.php 2012-03-13 01:46:33 UTC
(rev 113704)
@@ -2274,6 +2274,10 @@
'backend-fail-contenttype' => 'Could not determine the content type of the
file to store at "$1".',
'backend-fail-batchsize' => 'Storage backend given a batch of $1 file
{{PLURAL:$1|operation|operations}}; the limit is $2
{{PLURAL:$2|operation|operations}}.',
+# File journal
+'filejournal-fail-dbconnect' => 'Could not connect to the journal database for
storage backend "$1".',
+'filejournal-fail-dbquery' => 'Could not update the journal database for
storage backend "$1".',
+
# Lock manager
'lockmanager-notlocked' => 'Could not unlock "$1"; it is not locked.',
'lockmanager-fail-closelock' => 'Could not close lock file for "$1".',
Added: trunk/phase3/maintenance/archives/patch-filejournal.sql
===================================================================
--- trunk/phase3/maintenance/archives/patch-filejournal.sql
(rev 0)
+++ trunk/phase3/maintenance/archives/patch-filejournal.sql 2012-03-13
01:46:33 UTC (rev 113704)
@@ -0,0 +1,24 @@
+-- File backend operation journal
+CREATE TABLE /*_*/filejournal (
+ -- Unique ID for each file operation
+ fj_id bigint unsigned NOT NULL PRIMARY KEY auto_increment,
+ -- UUID of the batch this operation belongs to
+ fj_batch_uuid varbinary(32) NOT NULL,
+ -- The registered file backend name
+ fj_backend varchar(255) NOT NULL,
+ -- The storage path that was affected (may be internal paths)
+ fj_path blob NOT NULL,
+ -- SHA-1 file path hash in base-36
+ fj_path_sha1 varbinary(32) NOT NULL default '',
+ -- Primitive operation description (create/update/delete)
+ fj_op varchar(16) NOT NULL default '',
+ -- SHA-1 file content hash in base-36
+ fj_new_sha1 varbinary(32) NOT NULL default '',
+ -- Timestamp of the batch operation
+ fj_timestamp varbinary(14) NOT NULL default ''
+);
+
+CREATE INDEX /*i*/fj_batch_id ON /*_*/filejournal (fj_batch_uuid,fj_id);
+CREATE INDEX /*i*/fj_path_id ON /*_*/filejournal (fj_path_sha1,fj_id);
+CREATE INDEX /*i*/fj_new_sha1 ON /*_*/filejournal (fj_new_sha1,fj_id);
+CREATE INDEX /*i*/fj_timestamp ON /*_*/filejournal (fj_timestamp);
Property changes on: trunk/phase3/maintenance/archives/patch-filejournal.sql
___________________________________________________________________
Added: svn:eol-style
+ native
Modified: trunk/phase3/maintenance/language/messages.inc
===================================================================
--- trunk/phase3/maintenance/language/messages.inc 2012-03-13 01:44:39 UTC
(rev 113703)
+++ trunk/phase3/maintenance/language/messages.inc 2012-03-13 01:46:33 UTC
(rev 113704)
@@ -1377,6 +1377,11 @@
'backend-fail-batchsize'
),
+ 'filejournal-errors' => array(
+ 'filejournal-fail-dbconnect',
+ 'filejournal-fail-dbquery'
+ ),
+
'lockmanager-errors' => array(
'lockmanager-notlocked',
'lockmanager-fail-closelock',
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs