jenkins-bot has submitted this change and it was merged.

Change subject: Script to fix bug 53687 (failure of revision row deletion)
......................................................................


Script to fix bug 53687 (failure of revision row deletion)

Change-Id: I157a20cb89aa774f188875992d884553f47ea896
---
A bug-53687/find-orphans.sh
A bug-53687/find-orphans.sql
A bug-53687/fixOrphans.php
3 files changed, 171 insertions(+), 0 deletions(-)

Approvals:
  Tim Starling: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/bug-53687/find-orphans.sh b/bug-53687/find-orphans.sh
new file mode 100755
index 0000000..4014db9
--- /dev/null
+++ b/bug-53687/find-orphans.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+dir=`dirname $0`
+test -e $dir/orphans || mkdir -p $dir/orphans
+for db in `</usr/local/apache/common/all.dblist`;do
+       echo $db
+       mysql -h `mwscript getSlaveServer.php --wiki=$db` -N -B $db < 
$dir/find-orphans.sql > $dir/orphans/$db
+done
diff --git a/bug-53687/find-orphans.sql b/bug-53687/find-orphans.sql
new file mode 100644
index 0000000..8727ac3
--- /dev/null
+++ b/bug-53687/find-orphans.sql
@@ -0,0 +1,2 @@
+select up_page,up_timestamp,log_namespace,log_title,rev_id,ar_rev_id is not 
null as ar_rev_match,rev_text_id=ar_text_id as ar_text_match from revision left 
join archive on ar_rev_id=rev_id ,updates left join logging on 
up_timestamp=log_timestamp  where log_action='delete' and rev_page=up_page and 
up_action='delete';
+
diff --git a/bug-53687/fixOrphans.php b/bug-53687/fixOrphans.php
new file mode 100644
index 0000000..8ab526b
--- /dev/null
+++ b/bug-53687/fixOrphans.php
@@ -0,0 +1,161 @@
+<?php
+
+require( __DIR__ . '/../WikimediaMaintenance.php' );
+
+class FixOrphans extends WikimediaMaintenance {
+       function __construct() {
+               parent::__construct();
+               $this->addArg( 'list-file', 'The list file generated by 
find-orphans.sql' );
+               $this->addOption( 'dry-run', 'dry run' );
+       }
+
+       function execute() {
+               $fileName = $this->getArg( 0 );
+               $f = fopen( $fileName, 'r' );
+               if ( !$f ) {
+                       $this->error( "Unable to open list file \"$fileName\"" 
);
+                       exit( 1 );
+               }
+               $lineNumber = 0;
+               $dryRun = $this->getOption( 'dry-run' );
+               if ( $dryRun ) {
+                       $this->output( "Dry run mode\n" );
+               }
+               $dbw = wfGetDB( DB_MASTER );
+
+               $verifyPairs = array(
+                       'ar_comment' => 'rev_comment',
+                       'ar_user' => 'rev_user',
+                       'ar_user_text' => 'rev_user_text',
+                       'ar_timestamp' => 'rev_timestamp',
+                       'ar_minor_edit' => 'rev_minor_edit',
+                       'ar_text_id' => 'rev_text_id',
+                       'ar_deleted' => 'rev_deleted',
+                       'ar_len' => 'rev_len',
+                       'ar_page_id' => 'rev_page',
+                       'ar_parent_id' => 'rev_parent_id',
+                       'ar_sha1' => 'rev_sha1',
+               );
+
+               while ( !feof( $f ) ) {
+                       $line = fgets( $f );
+                       $lineNumber++;
+                       if ( $line === false ) {
+                               break;
+                       }
+                       $line = rtrim( $line, "\r\n" );
+                       if ( $line === '' ) {
+                               continue;
+                       }
+                       $parts = explode( "\t", $line );
+                       if ( count( $parts ) < 7 ) {
+                               $this->error( "XXX: ERROR Invalid line 
$lineNumber\n" );
+                               continue;
+                       }
+                       $info = array_combine( array( 'up_page', 
'up_timestamp', 'log_namespace', 
+                               'log_title', 'rev_id', 'ar_rev_match', 
'ar_text_match' ), $parts );
+                       $revId = $info['rev_id'];
+
+                       $dbw->begin();
+                       $revRow = $dbw->selectRow( 'revision', '*', array( 
'rev_id' => $revId ),
+                               __METHOD__, array( 'FOR UPDATE' ) );
+                       if ( !$revRow ) {
+                               $this->error( "$revId: ERROR revision row has 
disappeared!" );
+                               $dbw->commit();
+                               continue;
+                       }
+
+                       $arRow = $dbw->selectRow( 'archive', '*', array( 
'ar_rev_id' => $revId ),
+                               __METHOD__, array( 'FOR UPDATE' ) );
+                       $pageRow = $dbw->selectRow( 'page', '*', array( 
'page_id' => $revRow->rev_page ),
+                               __METHOD__, array( 'FOR UPDATE' ) );
+
+                       if ( $pageRow ) {
+                               // rev_page is somehow connected to a valid 
page row
+                               // This probably can't happen, but we want to 
be extra sure we are not
+                               // deleting live revisions
+                               if ( $arRow ) {
+                                       $this->output( "$revId: page still 
connected! " .
+                                               "Removing duplicate archive 
row.\n" );
+                                       $action = 'remove-archive';
+                               } else {
+                                       $this->output( "$revId: seems normal! 
Taking no action.\n" );
+                                       $action = 'none';
+                               }
+                       } elseif ( $arRow ) {
+                               // Both the revision and archive rows exist
+                               // The revision row is not connected to a page 
and so is
+                               // unreachable. So assuming both contain the 
same data, it is
+                               // appropriate to delete the revision row, 
leaving the archive
+                               // row as the sole means of accessing the text 
ID
+                               $action = 'remove-revision';
+                               foreach ( $verifyPairs as $arField => $revField 
) {
+                                       if ( $arRow->$arField !== 
$revRow->$revField ) {
+                                               $this->error( "$revId: ERROR 
mismatch between archive and revision " .
+                                                       "rows in field 
$arField/$revField" );
+                                               $action = 'none';
+                                               break;
+                                       }
+                               }
+                               if ( $action !== 'none' ) {
+                                       $this->output( "$revId: verified that 
orphan revision row matches " .
+                                               "existing archive row. Deleting 
revision row.\n" );
+                               }
+                       } else {
+                               // Only an orphaned revision row exists, so 
there is no way to access
+                               // the revision via the UI. The assumption is 
that a deletion failed
+                               // to complete, so we create a valid archive 
row and delete the invalid
+                               // revision row.
+                               if ( $info['log_namespace'] === 'NULL' || 
$info['log_title'] === 'NULL' ) {
+                                       $this->error( "$revId: ERROR no log 
row, unable to determine title\n" );
+                                       $action = 'none';
+                               } else {
+                                       $this->output( "$revId: moving orphaned 
revision row to archive\n" );
+                                       $action = 'move-revision';
+                               }
+                       }
+
+                       if ( $dryRun ) {
+                               $dbw->commit();
+                               continue;
+                       }
+
+                       if ( $action === 'remove-archive' ) {
+                               $dbw->delete( 'archive', array( 'ar_rev_id' => 
$revId ), __METHOD__ );
+                       } elseif ( $action === 'remove-revision' ) {
+                               $dbw->delete( 'revision', array( 'rev_id' => 
$revId ), __METHOD__ );
+                       } elseif ( $action === 'move-revision' ) {
+                               $dbw->insert( 'archive',
+                                       array(
+                                               'ar_namespace'  => 
$info['log_namespace'],
+                                               'ar_title'      => 
$info['log_title'],
+                                               'ar_comment'    => 
$revRow->rev_comment,
+                                               'ar_user'       => 
$revRow->rev_user,
+                                               'ar_user_text'  => 
$revRow->rev_user_text,
+                                               'ar_timestamp'  => 
$revRow->rev_timestamp,
+                                               'ar_minor_edit' => 
$revRow->rev_minor_edit,
+                                               'ar_rev_id'     => $revId,
+                                               'ar_parent_id'  => 
$revRow->rev_parent_id,
+                                               'ar_text_id'    => 
$revRow->rev_text_id,
+                                               'ar_text'       => '',
+                                               'ar_flags'      => '',
+                                               'ar_len'        => 
$revRow->rev_len,
+                                               'ar_page_id'    => 
$revRow->rev_page,
+                                               'ar_deleted'    => 
$revRow->rev_deleted,
+                                               'ar_sha1'       => 
$revRow->rev_sha1,
+                                       ),
+                                       __METHOD__ );
+                               $dbw->delete( 'revision', array( 'rev_id' => 
$revId ), __METHOD__ );
+                       }
+                       $dbw->commit();
+
+                       if ( $lineNumber % 100 == 1 ) {
+                               wfWaitForSlaves();
+                       }
+               }
+       }
+}
+
+$maintClass = 'FixOrphans';
+require_once( RUN_MAINTENANCE_IF_MAIN );
+

-- 
To view, visit https://gerrit.wikimedia.org/r/93645
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I157a20cb89aa774f188875992d884553f47ea896
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/WikimediaMaintenance
Gerrit-Branch: master
Gerrit-Owner: Tim Starling <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to