https://www.mediawiki.org/wiki/Special:Code/MediaWiki/114411

Revision: 114411
Author:   aaron
Date:     2012-03-21 21:42:16 +0000 (Wed, 21 Mar 2012)
Log Message:
-----------
Added some live cleanup scripts

Added Paths:
-----------
    branches/wmf/1.19wmf1/maintenance/FindFilesMissingDBRows.php
    branches/wmf/1.19wmf1/maintenance/fixBug35048Files.php

Added: branches/wmf/1.19wmf1/maintenance/FindFilesMissingDBRows.php
===================================================================
--- branches/wmf/1.19wmf1/maintenance/FindFilesMissingDBRows.php                
                (rev 0)
+++ branches/wmf/1.19wmf1/maintenance/FindFilesMissingDBRows.php        
2012-03-21 21:42:16 UTC (rev 114411)
@@ -0,0 +1,78 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+
+class FindFilesMissingDBRows extends Maintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = "Fix the image/oldimage DB records for a 
file.";
+               $this->addOption( 'outdir', "List bad files under this dir.", 
false, true );
+       }
+
+       public function execute() {
+               global $wgUploadDirectory, $wgDBname; // assumes FS
+
+               if ( !is_dir( "{$wgUploadDirectory}/archive" ) ) {
+                       return;
+               }
+
+               $outDir = $this->getOption( 'outdir' );
+               if ( $outDir && file_exists( "$outDir/$wgDBname" ) ) {
+                       $this->error( "$outDir/$wgDBname already exists", 1 );
+               }
+
+               $dbr = wfGetDB( DB_SLAVE );
+               for ( $i=0; $i<256; $i++ ) { // 16*16=256 shards
+                       $shard = wfBaseConvert( $i, 10, 16, 2 );
+                       $shardDir = 
"{$wgUploadDirectory}/archive/{$shard[0]}/{$shard}";
+                       if ( !is_dir( $shardDir ) ) {
+                               continue;
+                       }
+                       $this->output( "Doing shard $shard.\n" );
+
+                       // Files only from 2012 (we really only need feb-march)
+                       $fsList = array_filter( array_map( 'wfBaseName', 
explode( "\n",
+                               shell_exec( "find $shardDir -name \"2012*\"" )
+                       ) ) );
+                       if ( $fsList ) {
+                               $res = $dbr->select( 'oldimage', array( 
'oi_archive_name' ),
+                                       array( 'oi_archive_name' => $fsList )
+                               );
+                               $dbList = array();
+                               foreach ( $res as $row ) {
+                                       $dbList[$row->oi_archive_name] = 1;
+                               }
+                               foreach ( $fsList as $archiveName ) {
+                                       if ( !isset( $dbList[$archiveName] ) ) {
+                                               $this->output( "No DB record 
for file $archiveName.\n" );
+                                               if ( $outDir ) {
+                                                       file_put_contents( 
"$outDir/$wgDBname", "$archiveName\n", FILE_APPEND );
+                                               }
+                                       }
+                               }
+                       }
+               }
+       }
+}
+
+$maintClass = "FindFilesMissingDBRows";
+require_once( RUN_MAINTENANCE_IF_MAIN );

Added: branches/wmf/1.19wmf1/maintenance/fixBug35048Files.php
===================================================================
--- branches/wmf/1.19wmf1/maintenance/fixBug35048Files.php                      
        (rev 0)
+++ branches/wmf/1.19wmf1/maintenance/fixBug35048Files.php      2012-03-21 
21:42:16 UTC (rev 114411)
@@ -0,0 +1,148 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+
+class FixBug35048Files extends Maintenance {
+       public function __construct() {
+               parent::__construct();
+               $this->mDescription = "Fix the image/oldimage DB records for a 
file.";
+               $this->addOption( 'indir', "Dir containing orphaned archive 
file lists", true, true );
+               $this->addOption( 'outdir', "Log changes to files under this 
dir", true, true );
+       }
+
+       public function execute() {
+               global $wgUploadDirectory, $wgDBname; // assumes FS
+
+               $inDir = $this->getOption( 'indir' );
+               if ( !is_file( "$inDir/$wgDBname" ) ) {
+                       $this->error( "$inDir/$wgDBname does not exists", 1 );
+               }
+
+               $outDir = $this->getOption( 'outdir' );
+               if ( file_exists( "$outDir/$wgDBname" ) ) {
+                       $this->error( "$outDir/$wgDBname already exists", 1 );
+               }
+
+               if ( !wfMkdirParents( "$wgUploadDirectory/lost+found" ) || 
!wfMkdirParents( $outDir ) ) {
+                       return;
+               }
+
+               $dbw = wfGetDB( DB_MASTER );
+               $repo = RepoGroup::singleton()->getLocalRepo();
+
+               $list = array_filter( explode( "\n", file_get_contents( 
"$inDir/$wgDBname" ) ) );
+               foreach ( $list as $archiveName ) {
+                       list( $archTimestamp, $name ) = explode( '!', 
$archiveName, 2 );
+                       $title = Title::makeTitle( NS_FILE, $name );
+
+                       $oaFile = $repo->newFromArchiveName( $title, 
$archiveName );
+                       if ( $oaFile && $oaFile->exists() ) {
+                               continue; // fixed already
+                       }
+
+                       $file = $repo->findFile( $title );
+                       if ( $file && $file->exists() ) {
+                               $currentFilePath = "$wgUploadDirectory/" . 
$file->getRel();
+                               $archivedFilePath = "$wgUploadDirectory/" . 
$file->getArchiveRel( $archiveName );
+                               if ( !is_file( $currentFilePath ) || !is_file( 
$archivedFilePath ) ) {
+                                       $this->error( "Failed sanity check for 
file existence for '$name'." );
+                                       continue;
+                               }
+                               $archivedFileSha1 = wfBaseConvert( sha1_file( 
$archivedFilePath ), 16, 36, 31 );
+                               $currentFileSha1 = $repo->getFileSha1( 
$file->getPath() ); // base 36
+
+                               $history = $file->getHistory(); // old file 
versions
+
+                               #$this->output( "Checking '$archiveName', sha1 
$archivedFileSha1.\n" );
+                               if ( $file->getSha1() !== $currentFileSha1 ) { 
// mismatch?
+                                       $this->output( "'$name' gives sha1 
{$file->getSha1()}, not $currentFileSha1.\n" );
+                               }
+                               if (
+                                       // Current file row is pointing to the 
wrong file
+                                       $file->getSha1() !== $currentFileSha1 &&
+                                       // Current file row matches this 
orphaned file
+                                       $file->getSha1() === $archivedFileSha1
+                               ) {
+                                       // We often have two rows where the 
current row points to the wrong file
+                                       // and the old row points to none at 
all. Often, from the sha1s, the orphan
+                                       // and current files should have their 
names swapped and the old row should
+                                       // point to the current file (which we 
move to an archive name).
+                                       if ( count( $history ) && 
$history[0]->getArchiveName() === ''
+                                               && $history[0]->getSha1() === 
$currentFileSha1 )
+                                       {
+                                               // Switch the current version 
and the orphaned version on the FS.
+                                               $this->output( "Switching 
'$currentFilePath' with '$archivedFilePath'.\n" );
+                                               $tmpFilePath = 
"$wgUploadDirectory/lost+found/" . md5( $currentFilePath );
+                                               if ( !is_writable( 
$currentFilePath ) || !is_writable( $archivedFilePath ) ) {
+                                                       continue;
+                                               }
+                                               $ok = rename( $currentFilePath, 
$tmpFilePath ) // temp
+                                                       && rename( 
$archivedFilePath, $currentFilePath )
+                                                       && rename( 
$tmpFilePath, $archivedFilePath );
+                                               if ( $outDir ) { // log changes
+                                                       file_put_contents( 
"$outDir/$wgDBname",
+                                                               
"$currentFilePath $tmpFilePath\n", FILE_APPEND );
+                                                       file_put_contents( 
"$outDir/$wgDBname",
+                                                               
"$archivedFilePath $currentFilePath\n", FILE_APPEND );
+                                                       file_put_contents( 
"$outDir/$wgDBname",
+                                                               "$tmpFilePath 
$archivedFilePath\n", FILE_APPEND );
+                                               }
+                                               // Update DB to point to former 
current version next run (in $history loop)...
+                                       } else {
+                                               // Evict the current version to 
lost+found so it can be properly
+                                               // re-uploaded later, with the 
username, comment, and log entry.
+                                               $this->output( "Evicting 
'$currentFilePath' to '$wgUploadDirectory/lost+found/$name'.\n" );
+                                               #rename( $currentFilePath, 
"$wgUploadDirectory/lost+found/" . $file->getName() );
+                                               // Restore the orphaned 
archived version to current version file
+                                               // name so that it matches up 
with the current version metadata.
+                                               $this->output( "Moving 
'$archivedFilePath' back to '$currentFilePath'.\n" );
+                                               #rename( $archivedFilePath, 
$currentFilePath );
+                                               if ( $outDir ) { // log changes
+                                                       #file_put_contents( 
"$outDir/$wgDBname", "$currentFilePath $wgUploadDirectory/lost+found/$name\n", 
FILE_APPEND );
+                                                       #file_put_contents( 
"$outDir/$wgDBname", "$archivedFilePath $currentFilePath\n", FILE_APPEND );
+                                               }
+                                       }
+                               }
+                               // While at it, fix files with empty 
oi_archive_name but with oi_sha1 pointing
+                               // to this file. This can happen if is_file() 
fails in the File:publish() function.
+                               foreach ( $history as $oldFile ) {
+                                       if ( $oldFile->getArchiveName() === '' 
&& // broken row
+                                               $oldFile->getSha1() === 
$archivedFileSha1 ) // should have this name
+                                       {
+                                               $this->output( "Fixed empty 
oi_archive_name via sha1 $archivedFileSha1.\n" );
+                                               $dbw->update( 'oldimage', 
array( 'oi_archive_name' => $archiveName ),
+                                                       array(
+                                                               'oi_name'       
  => $name,
+                                                               'oi_sha1'       
  => $archivedFileSha1,
+                                                               'oi_timestamp'  
  => $dbw->timestamp( $oldFile->getTimestamp() ),
+                                                               
'oi_archive_name' => '' // sanity
+                                                       )
+                                               );
+                                       }
+                               }
+                       }
+               }
+       }
+}
+
+$maintClass = "FixBug35048Files";
+require_once( RUN_MAINTENANCE_IF_MAIN );


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to