ArielGlenn has submitted this change and it was merged. Change subject: More error logging/ sanity checks for dumpwikidata ......................................................................
More error logging/ sanity checks for dumpwikidata Not sure this is needed, but seems useful to have. Change-Id: I42c2a51fa4580f4c848fe55c5833017cee79a5e8 --- M modules/snapshot/files/cron/dumpwikidatajson.sh M modules/snapshot/files/cron/dumpwikidatattl.sh 2 files changed, 43 insertions(+), 13 deletions(-) Approvals: Hoo man: Looks good to me, but someone else must approve ArielGlenn: Looks good to me, approved jenkins-bot: Verified diff --git a/modules/snapshot/files/cron/dumpwikidatajson.sh b/modules/snapshot/files/cron/dumpwikidatajson.sh index d3bfa61..989f5a3 100644 --- a/modules/snapshot/files/cron/dumpwikidatajson.sh +++ b/modules/snapshot/files/cron/dumpwikidatajson.sh @@ -14,21 +14,25 @@ targetFileGzip=$targetDir/$filename.json.gz targetFileBzip2=$targetDir/$filename.json.bz2 failureFile=/tmp/dumpwikidatajson-failure +mainLogFile=/var/log/wikidatadump/dumpwikidatajson-$filename-main.log -i=0 shards=4 # Try to create the dump (up to three times). retries=0 while true; do + i=0 rm -f $failureFile while [ $i -lt $shards ]; do ( set -o pipefail - php $multiversionscript extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpJson.php --wiki wikidatawiki --shard $i --sharding-factor $shards --snippet 2>> /var/log/wikidatadump/dumpwikidatajson-$filename-$i.log | gzip > $tempDir/wikidataJson.$i.gz - if [ $? -gt 0 ]; then + errorLog=/var/log/wikidatadump/dumpwikidatajson-$filename-$i.log + php $multiversionscript extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpJson.php --wiki wikidatawiki --shard $i --sharding-factor $shards --snippet 2>> $errorLog | gzip > $tempDir/wikidataJson.$i.gz + exitCode=$? + if [ $exitCode -gt 0 ]; then + echo -e "\n\nProcess failed with exit code $exitCode" >> $errorLog echo 1 > $failureFile fi ) & @@ -40,8 +44,9 @@ if [ -f $failureFile ]; then # Something went wrong, let's clean up and maybe retry. Leave logs in place. rm -f $failureFile - rm $tempDir/wikidataJson.*.gz + rm -f $tempDir/wikidataJson.*.gz let retries++ + echo "Dumping one or more shards failed. Retrying." >> $mainLogFile if [ $retries -eq 3 ]; then exit 1 @@ -60,8 +65,18 @@ i=0 while [ $i -lt $shards ]; do - cat $tempDir/wikidataJson.$i.gz >> $tempDir/wikidataJson.gz - rm $tempDir/wikidataJson.$i.gz + tempFile=$tempDir/wikidataJson.$i.gz + if [ ! -f $tempFile ]; then + echo "$tempFile does not exist. Aborting." >> $mainLogFile + exit 1 + fi + fileSize=`stat --printf="%s" $tempFile` + if [ $fileSize -lt 1800000000 ]; then + echo "File size of $tempFile is only $fileSize. Aborting." >> $mainLogFile + exit 1 + fi + cat $tempFile >> $tempDir/wikidataJson.gz + rm $tempFile let i++ if [ $i -lt $shards ]; then # Shards don't end with commas so add commas to separate them diff --git a/modules/snapshot/files/cron/dumpwikidatattl.sh b/modules/snapshot/files/cron/dumpwikidatattl.sh index 3474ebc..1383bf0 100644 --- a/modules/snapshot/files/cron/dumpwikidatattl.sh +++ b/modules/snapshot/files/cron/dumpwikidatattl.sh @@ -2,7 +2,7 @@ ############################################################# # This file is maintained by puppet! # puppet:///modules/snapshot/cron/dumpwikidatattl.sh -############################################################## +############################################################# # # Generate a json dump for Wikidata and remove old ones. # @@ -14,21 +14,25 @@ targetFileGzip=$targetDir/$filename.ttl.gz targetFileBzip2=$targetDir/$filename.ttl.bz2 failureFile=/tmp/dumpwikidatattl-failure +mainLogFile=/var/log/wikidatadump/dumpwikidatattl-$filename-main.log -i=0 shards=4 # Try to create the dump (up to three times). retries=0 while true; do + i=0 rm -f $failureFile while [ $i -lt $shards ]; do ( set -o pipefail - php $multiversionscript extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpRdf.php --wiki wikidatawiki --shard $i --sharding-factor $shards --format ttl 2>> /var/log/wikidatadump/dumpwikidatattl-$filename-$i.log | gzip > $tempDir/wikidataTTL.$i.gz - if [ $? -gt 0 ]; then + errorLog=/var/log/wikidatadump/dumpwikidatattl-$filename-$i.log + php $multiversionscript extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpRdf.php --wiki wikidatawiki --shard $i --sharding-factor $shards --format ttl 2>> $errorLog | gzip > $tempDir/wikidataTTL.$i.gz + exitCode=$? + if [ $exitCode -gt 0 ]; then + echo -e "\n\nProcess failed with exit code $exitCode" >> $errorLog echo 1 > $failureFile fi ) & @@ -40,8 +44,9 @@ if [ -f $failureFile ]; then # Something went wrong, let's clean up and maybe retry. Leave logs in place. rm -f $failureFile - rm $tempDir/wikidataTTL.*.gz + rm -f $tempDir/wikidataTTL.*.gz let retries++ + echo "Dumping one or more shards failed. Retrying." >> $mainLogFile if [ $retries -eq 3 ]; then exit 1 @@ -57,8 +62,18 @@ i=0 while [ $i -lt $shards ]; do - cat $tempDir/wikidataTTL.$i.gz >> $tempDir/wikidataTtl.gz - rm $tempDir/wikidataTTL.$i.gz + tempFile=$tempDir/wikidataTTL.$i.gz + if [ ! -f $tempFile ]; then + echo "$tempFile does not exist. Aborting." >> $mainLogFile + exit 1 + fi + fileSize=`stat --printf="%s" $tempFile` + if [ $fileSize -lt 1800000000 ]; then + echo "File size of $tempFile is only $fileSize. Aborting." >> $mainLogFile + exit 1 + fi + cat $tempFile >> $tempDir/wikidataTtl.gz + rm $tempFile let i++ done -- To view, visit https://gerrit.wikimedia.org/r/311551 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I42c2a51fa4580f4c848fe55c5833017cee79a5e8 Gerrit-PatchSet: 6 Gerrit-Project: operations/puppet Gerrit-Branch: production Gerrit-Owner: Hoo man <h...@online.de> Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org> Gerrit-Reviewer: Aude <aude.w...@gmail.com> Gerrit-Reviewer: Hoo man <h...@online.de> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits