jenkins-bot has submitted this change and it was merged.
Change subject: Script to fix bug 53687 (failure of revision row deletion)
......................................................................
Script to fix bug 53687 (failure of revision row deletion)
Change-Id: I157a20cb89aa774f188875992d884553f47ea896
---
A bug-53687/find-orphans.sh
A bug-53687/find-orphans.sql
A bug-53687/fixOrphans.php
3 files changed, 171 insertions(+), 0 deletions(-)
Approvals:
Tim Starling: Looks good to me, approved
jenkins-bot: Verified
diff --git a/bug-53687/find-orphans.sh b/bug-53687/find-orphans.sh
new file mode 100755
index 0000000..4014db9
--- /dev/null
+++ b/bug-53687/find-orphans.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+dir=`dirname $0`
+test -e $dir/orphans || mkdir -p $dir/orphans
+for db in `</usr/local/apache/common/all.dblist`;do
+ echo $db
+ mysql -h `mwscript getSlaveServer.php --wiki=$db` -N -B $db <
$dir/find-orphans.sql > $dir/orphans/$db
+done
diff --git a/bug-53687/find-orphans.sql b/bug-53687/find-orphans.sql
new file mode 100644
index 0000000..8727ac3
--- /dev/null
+++ b/bug-53687/find-orphans.sql
@@ -0,0 +1,2 @@
+select up_page,up_timestamp,log_namespace,log_title,rev_id,ar_rev_id is not
null as ar_rev_match,rev_text_id=ar_text_id as ar_text_match from revision left
join archive on ar_rev_id=rev_id ,updates left join logging on
up_timestamp=log_timestamp where log_action='delete' and rev_page=up_page and
up_action='delete';
+
diff --git a/bug-53687/fixOrphans.php b/bug-53687/fixOrphans.php
new file mode 100644
index 0000000..8ab526b
--- /dev/null
+++ b/bug-53687/fixOrphans.php
@@ -0,0 +1,161 @@
+<?php
+
+require( __DIR__ . '/../WikimediaMaintenance.php' );
+
+class FixOrphans extends WikimediaMaintenance {
+ function __construct() {
+ parent::__construct();
+ $this->addArg( 'list-file', 'The list file generated by
find-orphans.sql' );
+ $this->addOption( 'dry-run', 'dry run' );
+ }
+
+ function execute() {
+ $fileName = $this->getArg( 0 );
+ $f = fopen( $fileName, 'r' );
+ if ( !$f ) {
+ $this->error( "Unable to open list file \"$fileName\""
);
+ exit( 1 );
+ }
+ $lineNumber = 0;
+ $dryRun = $this->getOption( 'dry-run' );
+ if ( $dryRun ) {
+ $this->output( "Dry run mode\n" );
+ }
+ $dbw = wfGetDB( DB_MASTER );
+
+ $verifyPairs = array(
+ 'ar_comment' => 'rev_comment',
+ 'ar_user' => 'rev_user',
+ 'ar_user_text' => 'rev_user_text',
+ 'ar_timestamp' => 'rev_timestamp',
+ 'ar_minor_edit' => 'rev_minor_edit',
+ 'ar_text_id' => 'rev_text_id',
+ 'ar_deleted' => 'rev_deleted',
+ 'ar_len' => 'rev_len',
+ 'ar_page_id' => 'rev_page',
+ 'ar_parent_id' => 'rev_parent_id',
+ 'ar_sha1' => 'rev_sha1',
+ );
+
+ while ( !feof( $f ) ) {
+ $line = fgets( $f );
+ $lineNumber++;
+ if ( $line === false ) {
+ break;
+ }
+ $line = rtrim( $line, "\r\n" );
+ if ( $line === '' ) {
+ continue;
+ }
+ $parts = explode( "\t", $line );
+ if ( count( $parts ) < 7 ) {
+ $this->error( "XXX: ERROR Invalid line
$lineNumber\n" );
+ continue;
+ }
+ $info = array_combine( array( 'up_page',
'up_timestamp', 'log_namespace',
+ 'log_title', 'rev_id', 'ar_rev_match',
'ar_text_match' ), $parts );
+ $revId = $info['rev_id'];
+
+ $dbw->begin();
+ $revRow = $dbw->selectRow( 'revision', '*', array(
'rev_id' => $revId ),
+ __METHOD__, array( 'FOR UPDATE' ) );
+ if ( !$revRow ) {
+ $this->error( "$revId: ERROR revision row has
disappeared!" );
+ $dbw->commit();
+ continue;
+ }
+
+ $arRow = $dbw->selectRow( 'archive', '*', array(
'ar_rev_id' => $revId ),
+ __METHOD__, array( 'FOR UPDATE' ) );
+ $pageRow = $dbw->selectRow( 'page', '*', array(
'page_id' => $revRow->rev_page ),
+ __METHOD__, array( 'FOR UPDATE' ) );
+
+ if ( $pageRow ) {
+ // rev_page is somehow connected to a valid
page row
+ // This probably can't happen, but we want to
be extra sure we are not
+ // deleting live revisions
+ if ( $arRow ) {
+ $this->output( "$revId: page still
connected! " .
+ "Removing duplicate archive
row.\n" );
+ $action = 'remove-archive';
+ } else {
+ $this->output( "$revId: seems normal!
Taking no action.\n" );
+ $action = 'none';
+ }
+ } elseif ( $arRow ) {
+ // Both the revision and archive rows exist
+ // The revision row is not connected to a page
and so is
+ // unreachable. So assuming both contain the
same data, it is
+ // appropriate to delete the revision row,
leaving the archive
+ // row as the sole means of accessing the text
ID
+ $action = 'remove-revision';
+ foreach ( $verifyPairs as $arField => $revField
) {
+ if ( $arRow->$arField !==
$revRow->$revField ) {
+ $this->error( "$revId: ERROR
mismatch between archive and revision " .
+ "rows in field
$arField/$revField" );
+ $action = 'none';
+ break;
+ }
+ }
+ if ( $action !== 'none' ) {
+ $this->output( "$revId: verified that
orphan revision row matches " .
+ "existing archive row. Deleting
revision row.\n" );
+ }
+ } else {
+ // Only an orphaned revision row exists, so
there is no way to access
+ // the revision via the UI. The assumption is
that a deletion failed
+ // to complete, so we create a valid archive
row and delete the invalid
+ // revision row.
+ if ( $info['log_namespace'] === 'NULL' ||
$info['log_title'] === 'NULL' ) {
+ $this->error( "$revId: ERROR no log
row, unable to determine title\n" );
+ $action = 'none';
+ } else {
+ $this->output( "$revId: moving orphaned
revision row to archive\n" );
+ $action = 'move-revision';
+ }
+ }
+
+ if ( $dryRun ) {
+ $dbw->commit();
+ continue;
+ }
+
+ if ( $action === 'remove-archive' ) {
+ $dbw->delete( 'archive', array( 'ar_rev_id' =>
$revId ), __METHOD__ );
+ } elseif ( $action === 'remove-revision' ) {
+ $dbw->delete( 'revision', array( 'rev_id' =>
$revId ), __METHOD__ );
+ } elseif ( $action === 'move-revision' ) {
+ $dbw->insert( 'archive',
+ array(
+ 'ar_namespace' =>
$info['log_namespace'],
+ 'ar_title' =>
$info['log_title'],
+ 'ar_comment' =>
$revRow->rev_comment,
+ 'ar_user' =>
$revRow->rev_user,
+ 'ar_user_text' =>
$revRow->rev_user_text,
+ 'ar_timestamp' =>
$revRow->rev_timestamp,
+ 'ar_minor_edit' =>
$revRow->rev_minor_edit,
+ 'ar_rev_id' => $revId,
+ 'ar_parent_id' =>
$revRow->rev_parent_id,
+ 'ar_text_id' =>
$revRow->rev_text_id,
+ 'ar_text' => '',
+ 'ar_flags' => '',
+ 'ar_len' =>
$revRow->rev_len,
+ 'ar_page_id' =>
$revRow->rev_page,
+ 'ar_deleted' =>
$revRow->rev_deleted,
+ 'ar_sha1' =>
$revRow->rev_sha1,
+ ),
+ __METHOD__ );
+ $dbw->delete( 'revision', array( 'rev_id' =>
$revId ), __METHOD__ );
+ }
+ $dbw->commit();
+
+ if ( $lineNumber % 100 == 1 ) {
+ wfWaitForSlaves();
+ }
+ }
+ }
+}
+
+$maintClass = 'FixOrphans';
+require_once( RUN_MAINTENANCE_IF_MAIN );
+
--
To view, visit https://gerrit.wikimedia.org/r/93645
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I157a20cb89aa774f188875992d884553f47ea896
Gerrit-PatchSet: 2
Gerrit-Project: mediawiki/extensions/WikimediaMaintenance
Gerrit-Branch: master
Gerrit-Owner: Tim Starling <[email protected]>
Gerrit-Reviewer: Tim Starling <[email protected]>
Gerrit-Reviewer: jenkins-bot
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits