Aaron Schulz has uploaded a new change for review.
https://gerrit.wikimedia.org/r/106638
Change subject: Optimized copyFileBackend to use MD5 from listing if given
(e.g. Swift)
......................................................................
Optimized copyFileBackend to use MD5 from listing if given (e.g. Swift)
Change-Id: Ic495d6c8c6dffb0b34e6b5eefb8de4d057a51069
---
M maintenance/copyFileBackend.php
1 file changed, 23 insertions(+), 5 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/mediawiki/core
refs/changes/38/106638/1
diff --git a/maintenance/copyFileBackend.php b/maintenance/copyFileBackend.php
index 09b9295..8bacb26 100644
--- a/maintenance/copyFileBackend.php
+++ b/maintenance/copyFileBackend.php
@@ -347,15 +347,33 @@
} else {
$dstStat = $dst->getFileStat( array( 'src' => $dPath )
);
}
- return (
+ // Initial fast checks to see if files are obviously different
+ $sameFast = (
is_array( $srcStat ) // sanity check that source exists
&& is_array( $dstStat ) // dest exists
&& $srcStat['size'] === $dstStat['size']
- && ( !$skipHash || $srcStat['mtime'] <=
$dstStat['mtime'] )
- && ( $skipHash || $src->getFileSha1Base36( array( 'src'
=> $sPath, 'latest' => 1 ) )
- === $dst->getFileSha1Base36( array( 'src' =>
$dPath, 'latest' => 1 ) )
- )
);
+ // More thorough checks against files
+ if ( !$sameFast ) {
+ $same = false; // no need to look farther
+ } elseif ( isset( $srcStat['md5'] ) && isset( $dstStat['md5'] )
) {
+ // If MD5 was already in the stat info, just use it.
+ // This is useful as many objects stores can return
this in object listing,
+ // so we can use it to avoid slow per-file HEADs.
+ $same = ( $srcStat['md5'] === $dstStat['md5'] );
+ } elseif ( $skipHash ) {
+ // This mode is good for copying to a backup location
or resyncing clone
+ // backends in FileBackendMultiWrite (since they get
writes second, they have
+ // higher timestamps). However, when copying the other
way, this hits loads of
+ // false positives (possibly 100%) and wastes a bunch
of time on GETs/PUTs.
+ $same = ( $srcStat['mtime'] <= $dstStat['mtime'] );
+ } else {
+ // This is the slowest method which does many per-file
HEADs (unless an object
+ // store tracks SHA-1 in listings).
+ $same = ( $src->getFileSha1Base36( array( 'src' =>
$sPath, 'latest' => 1 ) )
+ === $dst->getFileSha1Base36( array( 'src' =>
$dPath, 'latest' => 1 ) ) );
+ }
+ return $same;
}
}
--
To view, visit https://gerrit.wikimedia.org/r/106638
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic495d6c8c6dffb0b34e6b5eefb8de4d057a51069
Gerrit-PatchSet: 1
Gerrit-Project: mediawiki/core
Gerrit-Branch: master
Gerrit-Owner: Aaron Schulz <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits