EBernhardson has uploaded a new change for review.
https://gerrit.wikimedia.org/r/204212
Change subject: Final tweaks to repair process
......................................................................
Final tweaks to repair process
Change-Id: Iebbf84c402be40cba4f33b90f21f86b184eaed6f
---
M maintenance/repair_missing_revision_content.php
1 file changed, 16 insertions(+), 6 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/extensions/Flow
refs/changes/12/204212/1
diff --git a/maintenance/repair_missing_revision_content.php
b/maintenance/repair_missing_revision_content.php
index f556d59..90bf6db 100644
--- a/maintenance/repair_missing_revision_content.php
+++ b/maintenance/repair_missing_revision_content.php
@@ -24,7 +24,7 @@
if ( !$csvOutput ) {
die( "Could not open results file\n" );
}
-fputcsv( $csvOutput, array( "uuid", "esurl" ) );
+fputcsv( $csvOutput, array( "uuid", "esurl", "flags" ) );
$it = new EchoBatchRowIterator(
Flow\Container::get( 'db.factory' )->getDB( DB_SLAVE ),
@@ -91,10 +91,19 @@
// find any gaps in ES within this area
$matches = $lengths = array();
$invalid = false;
+
+ $flags = 'utf-8,gzip,external';
+ if ( in_array( $changeType, $plaintextChangeTypes ) ) {
+ $flags .= ',wikitext';
+ } else {
+ $flags .= ',html';
+ }
echo "Expected length: " . $rev->rev_content_length . "\n";
foreach ( array_keys( $esIdsForCluster ) as $cluster ) {
sort( $esIdsForCluster[$cluster] );
$lastId = reset( $esIdsForCluster[$cluster] );
+
+
foreach ( $esIdsForCluster[$cluster] as $id ) {
if ( $id === $lastId || $id === $lastId + 1 ) {
$lastId = $id;
@@ -138,7 +147,7 @@
}
}
if ( $doAppend ) {
- $matches[] = array(
$url, $content, md5( $content ) );
+ $matches[] = array(
$url, $content, $flags );
}
} else {
$lengths[] = $len;
@@ -156,16 +165,16 @@
var_dump( $lengths );
++$totalNoMatch;
} elseif ( count( $matches ) === 1 ) {
- list( $url, $content ) = reset( $matches );
+ list( $url, $content, $flags ) = reset( $matches );
echo "SINGLE DIRECT MATCH: $url : " . truncate(
$content, 1024 ) . "\n";
++$totalCompleteMatch;
- fputcsv( $csvOutput, array( $uuid->getAlphadecimal(),
$url ) );
+ fputcsv( $csvOutput, array( $uuid->getAlphadecimal(),
$url, $flags ) );
} else {
echo "MULTIPLE POTENTIAL MATCHES:\n";
++$totalMultipleMatches;
$multipleMatches[$uuid->getAlphadecimal()] = $matches;
foreach ( $matches as $match ) {
- list( $url, $content ) = $match;
+ list( $url, $content, $flags ) = $match;
echo "\t$url : " . truncate( $content, 1024 ) .
"\n";
}
}
@@ -230,7 +239,8 @@
echo "declare victory!\n";
foreach ( array_keys( $group ) as $uuid ) {
$match = array_shift( $expectedMatches );
- fputcsv( $csvOutput, array( $uuid, $match[0] )
);
+ list( $url, $content, $flags ) = $match;
+ fputcsv( $csvOutput, array( $uuid, $url, $flags
) );
--$totalMultipleMatches;
++$totalResolvedMultipleMatches;
}
--
To view, visit https://gerrit.wikimedia.org/r/204212
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Iebbf84c402be40cba4f33b90f21f86b184eaed6f
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/extensions/Flow
Gerrit-Branch: master
Gerrit-Owner: EBernhardson <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits