https://www.mediawiki.org/wiki/Special:Code/MediaWiki/114411
Revision: 114411
Author: aaron
Date: 2012-03-21 21:42:16 +0000 (Wed, 21 Mar 2012)
Log Message:
-----------
Added some live cleanup scripts
Added Paths:
-----------
branches/wmf/1.19wmf1/maintenance/FindFilesMissingDBRows.php
branches/wmf/1.19wmf1/maintenance/fixBug35048Files.php
Added: branches/wmf/1.19wmf1/maintenance/FindFilesMissingDBRows.php
===================================================================
--- branches/wmf/1.19wmf1/maintenance/FindFilesMissingDBRows.php
(rev 0)
+++ branches/wmf/1.19wmf1/maintenance/FindFilesMissingDBRows.php
2012-03-21 21:42:16 UTC (rev 114411)
@@ -0,0 +1,78 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+
+class FindFilesMissingDBRows extends Maintenance {
+ public function __construct() {
+ parent::__construct();
+ $this->mDescription = "Fix the image/oldimage DB records for a
file.";
+ $this->addOption( 'outdir', "List bad files under this dir.",
false, true );
+ }
+
+ public function execute() {
+ global $wgUploadDirectory, $wgDBname; // assumes FS
+
+ if ( !is_dir( "{$wgUploadDirectory}/archive" ) ) {
+ return;
+ }
+
+ $outDir = $this->getOption( 'outdir' );
+ if ( $outDir && file_exists( "$outDir/$wgDBname" ) ) {
+ $this->error( "$outDir/$wgDBname already exists", 1 );
+ }
+
+ $dbr = wfGetDB( DB_SLAVE );
+ for ( $i=0; $i<256; $i++ ) { // 16*16=256 shards
+ $shard = wfBaseConvert( $i, 10, 16, 2 );
+ $shardDir =
"{$wgUploadDirectory}/archive/{$shard[0]}/{$shard}";
+ if ( !is_dir( $shardDir ) ) {
+ continue;
+ }
+ $this->output( "Doing shard $shard.\n" );
+
+ // Files only from 2012 (we really only need feb-march)
+ $fsList = array_filter( array_map( 'wfBaseName',
explode( "\n",
+ shell_exec( "find $shardDir -name \"2012*\"" )
+ ) ) );
+ if ( $fsList ) {
+ $res = $dbr->select( 'oldimage', array(
'oi_archive_name' ),
+ array( 'oi_archive_name' => $fsList )
+ );
+ $dbList = array();
+ foreach ( $res as $row ) {
+ $dbList[$row->oi_archive_name] = 1;
+ }
+ foreach ( $fsList as $archiveName ) {
+ if ( !isset( $dbList[$archiveName] ) ) {
+ $this->output( "No DB record
for file $archiveName.\n" );
+ if ( $outDir ) {
+ file_put_contents(
"$outDir/$wgDBname", "$archiveName\n", FILE_APPEND );
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+$maintClass = "FindFilesMissingDBRows";
+require_once( RUN_MAINTENANCE_IF_MAIN );
Added: branches/wmf/1.19wmf1/maintenance/fixBug35048Files.php
===================================================================
--- branches/wmf/1.19wmf1/maintenance/fixBug35048Files.php
(rev 0)
+++ branches/wmf/1.19wmf1/maintenance/fixBug35048Files.php 2012-03-21
21:42:16 UTC (rev 114411)
@@ -0,0 +1,148 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+
+class FixBug35048Files extends Maintenance {
+ public function __construct() {
+ parent::__construct();
+ $this->mDescription = "Fix the image/oldimage DB records for a
file.";
+ $this->addOption( 'indir', "Dir containing orphaned archive
file lists", true, true );
+ $this->addOption( 'outdir', "Log changes to files under this
dir", true, true );
+ }
+
+ public function execute() {
+ global $wgUploadDirectory, $wgDBname; // assumes FS
+
+ $inDir = $this->getOption( 'indir' );
+ if ( !is_file( "$inDir/$wgDBname" ) ) {
+ $this->error( "$inDir/$wgDBname does not exists", 1 );
+ }
+
+ $outDir = $this->getOption( 'outdir' );
+ if ( file_exists( "$outDir/$wgDBname" ) ) {
+ $this->error( "$outDir/$wgDBname already exists", 1 );
+ }
+
+ if ( !wfMkdirParents( "$wgUploadDirectory/lost+found" ) ||
!wfMkdirParents( $outDir ) ) {
+ return;
+ }
+
+ $dbw = wfGetDB( DB_MASTER );
+ $repo = RepoGroup::singleton()->getLocalRepo();
+
+ $list = array_filter( explode( "\n", file_get_contents(
"$inDir/$wgDBname" ) ) );
+ foreach ( $list as $archiveName ) {
+ list( $archTimestamp, $name ) = explode( '!',
$archiveName, 2 );
+ $title = Title::makeTitle( NS_FILE, $name );
+
+ $oaFile = $repo->newFromArchiveName( $title,
$archiveName );
+ if ( $oaFile && $oaFile->exists() ) {
+ continue; // fixed already
+ }
+
+ $file = $repo->findFile( $title );
+ if ( $file && $file->exists() ) {
+ $currentFilePath = "$wgUploadDirectory/" .
$file->getRel();
+ $archivedFilePath = "$wgUploadDirectory/" .
$file->getArchiveRel( $archiveName );
+ if ( !is_file( $currentFilePath ) || !is_file(
$archivedFilePath ) ) {
+ $this->error( "Failed sanity check for
file existence for '$name'." );
+ continue;
+ }
+ $archivedFileSha1 = wfBaseConvert( sha1_file(
$archivedFilePath ), 16, 36, 31 );
+ $currentFileSha1 = $repo->getFileSha1(
$file->getPath() ); // base 36
+
+ $history = $file->getHistory(); // old file
versions
+
+ #$this->output( "Checking '$archiveName', sha1
$archivedFileSha1.\n" );
+ if ( $file->getSha1() !== $currentFileSha1 ) {
// mismatch?
+ $this->output( "'$name' gives sha1
{$file->getSha1()}, not $currentFileSha1.\n" );
+ }
+ if (
+ // Current file row is pointing to the
wrong file
+ $file->getSha1() !== $currentFileSha1 &&
+ // Current file row matches this
orphaned file
+ $file->getSha1() === $archivedFileSha1
+ ) {
+ // We often have two rows where the
current row points to the wrong file
+ // and the old row points to none at
all. Often, from the sha1s, the orphan
+ // and current files should have their
names swapped and the old row should
+ // point to the current file (which we
move to an archive name).
+ if ( count( $history ) &&
$history[0]->getArchiveName() === ''
+ && $history[0]->getSha1() ===
$currentFileSha1 )
+ {
+ // Switch the current version
and the orphaned version on the FS.
+ $this->output( "Switching
'$currentFilePath' with '$archivedFilePath'.\n" );
+ $tmpFilePath =
"$wgUploadDirectory/lost+found/" . md5( $currentFilePath );
+ if ( !is_writable(
$currentFilePath ) || !is_writable( $archivedFilePath ) ) {
+ continue;
+ }
+ $ok = rename( $currentFilePath,
$tmpFilePath ) // temp
+ && rename(
$archivedFilePath, $currentFilePath )
+ && rename(
$tmpFilePath, $archivedFilePath );
+ if ( $outDir ) { // log changes
+ file_put_contents(
"$outDir/$wgDBname",
+
"$currentFilePath $tmpFilePath\n", FILE_APPEND );
+ file_put_contents(
"$outDir/$wgDBname",
+
"$archivedFilePath $currentFilePath\n", FILE_APPEND );
+ file_put_contents(
"$outDir/$wgDBname",
+ "$tmpFilePath
$archivedFilePath\n", FILE_APPEND );
+ }
+ // Update DB to point to former
current version next run (in $history loop)...
+ } else {
+ // Evict the current version to
lost+found so it can be properly
+ // re-uploaded later, with the
username, comment, and log entry.
+ $this->output( "Evicting
'$currentFilePath' to '$wgUploadDirectory/lost+found/$name'.\n" );
+ #rename( $currentFilePath,
"$wgUploadDirectory/lost+found/" . $file->getName() );
+ // Restore the orphaned
archived version to current version file
+ // name so that it matches up
with the current version metadata.
+ $this->output( "Moving
'$archivedFilePath' back to '$currentFilePath'.\n" );
+ #rename( $archivedFilePath,
$currentFilePath );
+ if ( $outDir ) { // log changes
+ #file_put_contents(
"$outDir/$wgDBname", "$currentFilePath $wgUploadDirectory/lost+found/$name\n",
FILE_APPEND );
+ #file_put_contents(
"$outDir/$wgDBname", "$archivedFilePath $currentFilePath\n", FILE_APPEND );
+ }
+ }
+ }
+ // While at it, fix files with empty
oi_archive_name but with oi_sha1 pointing
+ // to this file. This can happen if is_file()
fails in the File:publish() function.
+ foreach ( $history as $oldFile ) {
+ if ( $oldFile->getArchiveName() === ''
&& // broken row
+ $oldFile->getSha1() ===
$archivedFileSha1 ) // should have this name
+ {
+ $this->output( "Fixed empty
oi_archive_name via sha1 $archivedFileSha1.\n" );
+ $dbw->update( 'oldimage',
array( 'oi_archive_name' => $archiveName ),
+ array(
+ 'oi_name'
=> $name,
+ 'oi_sha1'
=> $archivedFileSha1,
+ 'oi_timestamp'
=> $dbw->timestamp( $oldFile->getTimestamp() ),
+
'oi_archive_name' => '' // sanity
+ )
+ );
+ }
+ }
+ }
+ }
+ }
+}
+
+$maintClass = "FixBug35048Files";
+require_once( RUN_MAINTENANCE_IF_MAIN );
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs