Hoo man has uploaded a new change for review.
https://gerrit.wikimedia.org/r/293445
Change subject: Retry Wikidata dump creation up to three times
......................................................................
Retry Wikidata dump creation up to three times
Related to T137366
Change-Id: I257efd74ff770dbdec0e6856b3be8dfc30b0168d
---
M modules/snapshot/files/cron/dumpwikidatajson.sh
M modules/snapshot/files/cron/dumpwikidatattl.sh
2 files changed, 104 insertions(+), 73 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/operations/puppet
refs/changes/45/293445/1
diff --git a/modules/snapshot/files/cron/dumpwikidatajson.sh
b/modules/snapshot/files/cron/dumpwikidatajson.sh
index f99ab77..231fd26 100644
--- a/modules/snapshot/files/cron/dumpwikidatajson.sh
+++ b/modules/snapshot/files/cron/dumpwikidatajson.sh
@@ -18,59 +18,75 @@
i=0
shards=4
-rm -f $failureFile
+# Try to create the dump (up to three times).
+retries=0
-while [ $i -lt $shards ]; do
- (
- set -o pipefail
- php $multiversionscript
extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpJson.php --wiki
wikidatawiki --shard $i --sharding-factor $shards --snippet 2>>
/var/log/wikidatadump/dumpwikidatajson-$filename-$i.log | gzip >
$tempDir/wikidataJson.$i.gz
- if [ $? -gt 0 ]; then
- echo 1 > $failureFile
- fi
- ) &
- let i++
-done
-
-wait
-
-if [ -f $failureFile ]; then
- # Something went wrong, let's clean up and give up for now. Leave logs
in place.
+while true; do
rm -f $failureFile
- rm $tempDir/wikidataJson.*.gz
-else
- # Open the json list
- echo '[' | gzip -f > $tempDir/wikidataJson.gz
- i=0
while [ $i -lt $shards ]; do
- cat $tempDir/wikidataJson.$i.gz >> $tempDir/wikidataJson.gz
- rm $tempDir/wikidataJson.$i.gz
+ (
+ set -o pipefail
+ php $multiversionscript
extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpJson.php --wiki
wikidatawiki --shard $i --sharding-factor $shards --snippet 2>>
/var/log/wikidatadump/dumpwikidatajson-$filename-$i.log | gzip >
$tempDir/wikidataJson.$i.gz
+ if [ $? -gt 0 ]; then
+ echo 1 > $failureFile
+ fi
+ ) &
let i++
- if [ $i -lt $shards ]; then
- # Shards don't end with commas so add commas to
separate them
- echo ',' | gzip -f >> $tempDir/wikidataJson.gz
- fi
done
- # Close the json list
- echo -e '\n]' | gzip -f >> $tempDir/wikidataJson.gz
+ wait
- mv $tempDir/wikidataJson.gz $targetFileGzip
+ if [ -f $failureFile ]; then
+ # Something went wrong, let's clean up and give up for now.
Leave logs in place.
+ rm -f $failureFile
+ rm $tempDir/wikidataJson.*.gz
+ let retries++
- # Legacy directory (with legacy naming scheme)
- legacyDirectory=$publicDir/other/wikidata
- ln -s "../wikibase/wikidatawiki/$today/$filename.json.gz"
"$legacyDirectory/$today.json.gz"
- find $legacyDirectory -name '*.json.gz' -mtime +`expr $daysToKeep + 1`
-delete
+ if [ $retries -eq 3 ]; then
+ exit 1
+ fi
- # (Re-)create the link to the latest
- ln -fs "$today/$filename.json.gz" "$targetDirBase/latest-all.json.gz"
+ # Another attempt
+ continue
+ fi
- # Create the bzip2 from the gzip one and update the latest-all.json.bz2
link
- gzip -dc $targetFileGzip | bzip2 -c > $tempDir/wikidataJson.bz2
- mv $tempDir/wikidataJson.bz2 $targetFileBzip2
- ln -fs "$today/$filename.json.bz2" "$targetDirBase/latest-all.json.bz2"
+ break
- pruneOldDirectories
- pruneOldLogs
- runDcat
-fi
+done
+
+# Open the json list
+echo '[' | gzip -f > $tempDir/wikidataJson.gz
+
+i=0
+while [ $i -lt $shards ]; do
+ cat $tempDir/wikidataJson.$i.gz >> $tempDir/wikidataJson.gz
+ rm $tempDir/wikidataJson.$i.gz
+ let i++
+ if [ $i -lt $shards ]; then
+ # Shards don't end with commas so add commas to separate them
+ echo ',' | gzip -f >> $tempDir/wikidataJson.gz
+ fi
+done
+
+# Close the json list
+echo -e '\n]' | gzip -f >> $tempDir/wikidataJson.gz
+
+mv $tempDir/wikidataJson.gz $targetFileGzip
+
+# Legacy directory (with legacy naming scheme)
+legacyDirectory=$publicDir/other/wikidata
+ln -s "../wikibase/wikidatawiki/$today/$filename.json.gz"
"$legacyDirectory/$today.json.gz"
+find $legacyDirectory -name '*.json.gz' -mtime +`expr $daysToKeep + 1` -delete
+
+# (Re-)create the link to the latest
+ln -fs "$today/$filename.json.gz" "$targetDirBase/latest-all.json.gz"
+
+# Create the bzip2 from the gzip one and update the latest-all.json.bz2 link
+gzip -dc $targetFileGzip | bzip2 -c > $tempDir/wikidataJson.bz2
+mv $tempDir/wikidataJson.bz2 $targetFileBzip2
+ln -fs "$today/$filename.json.bz2" "$targetDirBase/latest-all.json.bz2"
+
+pruneOldDirectories
+pruneOldLogs
+runDcat
diff --git a/modules/snapshot/files/cron/dumpwikidatattl.sh
b/modules/snapshot/files/cron/dumpwikidatattl.sh
index 4e8a116..cf832e5 100644
--- a/modules/snapshot/files/cron/dumpwikidatattl.sh
+++ b/modules/snapshot/files/cron/dumpwikidatattl.sh
@@ -17,39 +17,54 @@
i=0
shards=4
-rm -f $failureFile
-
-while [ $i -lt $shards ]; do
- (
- set -o pipefail
- php $multiversionscript
extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpRdf.php --wiki
wikidatawiki --shard $i --sharding-factor $shards --format ttl 2>>
/var/log/wikidatadump/dumpwikidatattl-$filename-$i.log | gzip >
$tempDir/wikidataTTL.$i.gz
- if [ $? -gt 0 ]; then
- echo 1 > $failureFile
- fi
- ) &
- let i++
-done
-
-wait
-
-if [ -f $failureFile ]; then
- # Something went wrong, let's clean up and give up for now. Leave logs
in place.
+# Try to create the dump (up to three times).
+retries=0
+while true; do
rm -f $failureFile
- rm $tempDir/wikidataTTL.*.gz
-else
- i=0
+
while [ $i -lt $shards ]; do
- cat $tempDir/wikidataTTL.$i.gz >> $tempDir/wikidataTtl.gz
- rm $tempDir/wikidataTTL.$i.gz
+ (
+ set -o pipefail
+ php $multiversionscript
extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpRdf.php --wiki
wikidatawiki --shard $i --sharding-factor $shards --format ttl 2>>
/var/log/wikidatadump/dumpwikidatattl-$filename-$i.log | gzip >
$tempDir/wikidataTTL.$i.gz
+ if [ $? -gt 0 ]; then
+ echo 1 > $failureFile
+ fi
+ ) &
let i++
done
- mv $tempDir/wikidataTtl.gz $targetFileGzip
+ wait
- gzip -dc $targetFileGzip | bzip2 -c > $tempDir/wikidataTtl.bz2
- mv $tempDir/wikidataTtl.bz2 $targetFileBzip2
+ if [ -f $failureFile ]; then
+ # Something went wrong, let's clean up and give up for now.
Leave logs in place.
+ rm -f $failureFile
+ rm $tempDir/wikidataTTL.*.gz
+ let retries++
- pruneOldDirectories
- pruneOldLogs
- runDcat
-fi
+ if [ $retries -eq 3 ]; then
+ exit 1
+ fi
+
+ # Another attempt
+ continue
+ fi
+
+ break
+
+done
+
+i=0
+while [ $i -lt $shards ]; do
+ cat $tempDir/wikidataTTL.$i.gz >> $tempDir/wikidataTtl.gz
+ rm $tempDir/wikidataTTL.$i.gz
+ let i++
+done
+
+mv $tempDir/wikidataTtl.gz $targetFileGzip
+
+gzip -dc $targetFileGzip | bzip2 -c > $tempDir/wikidataTtl.bz2
+mv $tempDir/wikidataTtl.bz2 $targetFileBzip2
+
+pruneOldDirectories
+pruneOldLogs
+runDcat
--
To view, visit https://gerrit.wikimedia.org/r/293445
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I257efd74ff770dbdec0e6856b3be8dfc30b0168d
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Hoo man <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits