Hoo man has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/293445

Change subject: Retry Wikidata dump creation up to three times
......................................................................

Retry Wikidata dump creation up to three times

Related to T137366

Change-Id: I257efd74ff770dbdec0e6856b3be8dfc30b0168d
---
M modules/snapshot/files/cron/dumpwikidatajson.sh
M modules/snapshot/files/cron/dumpwikidatattl.sh
2 files changed, 104 insertions(+), 73 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/45/293445/1

diff --git a/modules/snapshot/files/cron/dumpwikidatajson.sh 
b/modules/snapshot/files/cron/dumpwikidatajson.sh
index f99ab77..231fd26 100644
--- a/modules/snapshot/files/cron/dumpwikidatajson.sh
+++ b/modules/snapshot/files/cron/dumpwikidatajson.sh
@@ -18,59 +18,75 @@
 i=0
 shards=4
 
-rm -f $failureFile
+# Try to create the dump (up to three times).
+retries=0
 
-while [ $i -lt $shards ]; do
-       (
-               set -o pipefail
-               php $multiversionscript 
extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpJson.php --wiki 
wikidatawiki --shard $i --sharding-factor $shards --snippet 2>> 
/var/log/wikidatadump/dumpwikidatajson-$filename-$i.log | gzip > 
$tempDir/wikidataJson.$i.gz
-               if [ $? -gt 0 ]; then
-                       echo 1 > $failureFile
-               fi
-       ) &
-       let i++
-done
-
-wait
-
-if [ -f $failureFile ]; then
-       # Something went wrong, let's clean up and give up for now. Leave logs 
in place.
+while true; do
        rm -f $failureFile
-       rm $tempDir/wikidataJson.*.gz
-else
-       # Open the json list
-       echo '[' | gzip -f > $tempDir/wikidataJson.gz
 
-       i=0
        while [ $i -lt $shards ]; do
-               cat $tempDir/wikidataJson.$i.gz >> $tempDir/wikidataJson.gz
-               rm $tempDir/wikidataJson.$i.gz
+               (
+                       set -o pipefail
+                       php $multiversionscript 
extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpJson.php --wiki 
wikidatawiki --shard $i --sharding-factor $shards --snippet 2>> 
/var/log/wikidatadump/dumpwikidatajson-$filename-$i.log | gzip > 
$tempDir/wikidataJson.$i.gz
+                       if [ $? -gt 0 ]; then
+                               echo 1 > $failureFile
+                       fi
+               ) &
                let i++
-               if [ $i -lt $shards ]; then
-                       # Shards don't end with commas so add commas to 
separate them
-                       echo ',' | gzip -f >> $tempDir/wikidataJson.gz
-               fi
        done
 
-       # Close the json list
-       echo -e '\n]' | gzip -f >> $tempDir/wikidataJson.gz
+       wait
 
-       mv $tempDir/wikidataJson.gz $targetFileGzip
+       if [ -f $failureFile ]; then
+               # Something went wrong, let's clean up and give up for now. 
Leave logs in place.
+               rm -f $failureFile
+               rm $tempDir/wikidataJson.*.gz
+               let retries++
 
-       # Legacy directory (with legacy naming scheme)
-       legacyDirectory=$publicDir/other/wikidata
-       ln -s "../wikibase/wikidatawiki/$today/$filename.json.gz" 
"$legacyDirectory/$today.json.gz"
-       find $legacyDirectory -name '*.json.gz' -mtime +`expr $daysToKeep + 1` 
-delete
+               if [ $retries -eq 3 ]; then
+                       exit 1
+               fi
 
-       # (Re-)create the link to the latest
-       ln -fs "$today/$filename.json.gz" "$targetDirBase/latest-all.json.gz"
+               # Another attempt
+               continue
+       fi
 
-       # Create the bzip2 from the gzip one and update the latest-all.json.bz2 
link
-       gzip -dc $targetFileGzip | bzip2 -c > $tempDir/wikidataJson.bz2
-       mv $tempDir/wikidataJson.bz2 $targetFileBzip2
-       ln -fs "$today/$filename.json.bz2" "$targetDirBase/latest-all.json.bz2"
+       break
 
-       pruneOldDirectories
-       pruneOldLogs
-       runDcat
-fi
+done
+
+# Open the json list
+echo '[' | gzip -f > $tempDir/wikidataJson.gz
+
+i=0
+while [ $i -lt $shards ]; do
+       cat $tempDir/wikidataJson.$i.gz >> $tempDir/wikidataJson.gz
+       rm $tempDir/wikidataJson.$i.gz
+       let i++
+       if [ $i -lt $shards ]; then
+               # Shards don't end with commas so add commas to separate them
+               echo ',' | gzip -f >> $tempDir/wikidataJson.gz
+       fi
+done
+
+# Close the json list
+echo -e '\n]' | gzip -f >> $tempDir/wikidataJson.gz
+
+mv $tempDir/wikidataJson.gz $targetFileGzip
+
+# Legacy directory (with legacy naming scheme)
+legacyDirectory=$publicDir/other/wikidata
+ln -s "../wikibase/wikidatawiki/$today/$filename.json.gz" 
"$legacyDirectory/$today.json.gz"
+find $legacyDirectory -name '*.json.gz' -mtime +`expr $daysToKeep + 1` -delete
+
+# (Re-)create the link to the latest
+ln -fs "$today/$filename.json.gz" "$targetDirBase/latest-all.json.gz"
+
+# Create the bzip2 from the gzip one and update the latest-all.json.bz2 link
+gzip -dc $targetFileGzip | bzip2 -c > $tempDir/wikidataJson.bz2
+mv $tempDir/wikidataJson.bz2 $targetFileBzip2
+ln -fs "$today/$filename.json.bz2" "$targetDirBase/latest-all.json.bz2"
+
+pruneOldDirectories
+pruneOldLogs
+runDcat
diff --git a/modules/snapshot/files/cron/dumpwikidatattl.sh 
b/modules/snapshot/files/cron/dumpwikidatattl.sh
index 4e8a116..cf832e5 100644
--- a/modules/snapshot/files/cron/dumpwikidatattl.sh
+++ b/modules/snapshot/files/cron/dumpwikidatattl.sh
@@ -17,39 +17,54 @@
 i=0
 shards=4
 
-rm -f $failureFile
-
-while [ $i -lt $shards ]; do
-       (
-               set -o pipefail
-               php $multiversionscript 
extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpRdf.php --wiki 
wikidatawiki --shard $i --sharding-factor $shards --format ttl 2>> 
/var/log/wikidatadump/dumpwikidatattl-$filename-$i.log | gzip > 
$tempDir/wikidataTTL.$i.gz
-               if [ $? -gt 0 ]; then
-                       echo 1 > $failureFile
-               fi
-       ) &
-       let i++
-done
-
-wait
-
-if [ -f $failureFile ]; then
-       # Something went wrong, let's clean up and give up for now. Leave logs 
in place.
+# Try to create the dump (up to three times).
+retries=0
+while true; do
        rm -f $failureFile
-       rm $tempDir/wikidataTTL.*.gz
-else
-       i=0
+
        while [ $i -lt $shards ]; do
-               cat $tempDir/wikidataTTL.$i.gz >> $tempDir/wikidataTtl.gz
-               rm $tempDir/wikidataTTL.$i.gz
+               (
+                       set -o pipefail
+                       php $multiversionscript 
extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpRdf.php --wiki 
wikidatawiki --shard $i --sharding-factor $shards --format ttl 2>> 
/var/log/wikidatadump/dumpwikidatattl-$filename-$i.log | gzip > 
$tempDir/wikidataTTL.$i.gz
+                       if [ $? -gt 0 ]; then
+                               echo 1 > $failureFile
+                       fi
+               ) &
                let i++
        done
 
-       mv $tempDir/wikidataTtl.gz $targetFileGzip
+       wait
 
-       gzip -dc $targetFileGzip | bzip2 -c > $tempDir/wikidataTtl.bz2
-       mv $tempDir/wikidataTtl.bz2 $targetFileBzip2
+       if [ -f $failureFile ]; then
+               # Something went wrong, let's clean up and give up for now. 
Leave logs in place.
+               rm -f $failureFile
+               rm $tempDir/wikidataTTL.*.gz
+               let retries++
 
-       pruneOldDirectories
-       pruneOldLogs
-       runDcat
-fi
+               if [ $retries -eq 3 ]; then
+                       exit 1
+               fi
+
+               # Another attempt
+               continue
+       fi
+
+       break
+
+done
+
+i=0
+while [ $i -lt $shards ]; do
+       cat $tempDir/wikidataTTL.$i.gz >> $tempDir/wikidataTtl.gz
+       rm $tempDir/wikidataTTL.$i.gz
+       let i++
+done
+
+mv $tempDir/wikidataTtl.gz $targetFileGzip
+
+gzip -dc $targetFileGzip | bzip2 -c > $tempDir/wikidataTtl.bz2
+mv $tempDir/wikidataTtl.bz2 $targetFileBzip2
+
+pruneOldDirectories
+pruneOldLogs
+runDcat

-- 
To view, visit https://gerrit.wikimedia.org/r/293445
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I257efd74ff770dbdec0e6856b3be8dfc30b0168d
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Hoo man <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to