ArielGlenn has submitted this change and it was merged.

Change subject: More error logging/ sanity checks for dumpwikidata
......................................................................


More error logging/ sanity checks for dumpwikidata

Not sure this is needed, but seems useful to have.

Change-Id: I42c2a51fa4580f4c848fe55c5833017cee79a5e8
---
M modules/snapshot/files/cron/dumpwikidatajson.sh
M modules/snapshot/files/cron/dumpwikidatattl.sh
2 files changed, 43 insertions(+), 13 deletions(-)

Approvals:
  Hoo man: Looks good to me, but someone else must approve
  ArielGlenn: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/modules/snapshot/files/cron/dumpwikidatajson.sh 
b/modules/snapshot/files/cron/dumpwikidatajson.sh
index d3bfa61..989f5a3 100644
--- a/modules/snapshot/files/cron/dumpwikidatajson.sh
+++ b/modules/snapshot/files/cron/dumpwikidatajson.sh
@@ -14,21 +14,25 @@
 targetFileGzip=$targetDir/$filename.json.gz
 targetFileBzip2=$targetDir/$filename.json.bz2
 failureFile=/tmp/dumpwikidatajson-failure
+mainLogFile=/var/log/wikidatadump/dumpwikidatajson-$filename-main.log
 
-i=0
 shards=4
 
 # Try to create the dump (up to three times).
 retries=0
 
 while true; do
+       i=0
        rm -f $failureFile
 
        while [ $i -lt $shards ]; do
                (
                        set -o pipefail
-                       php $multiversionscript 
extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpJson.php --wiki 
wikidatawiki --shard $i --sharding-factor $shards --snippet 2>> 
/var/log/wikidatadump/dumpwikidatajson-$filename-$i.log | gzip > 
$tempDir/wikidataJson.$i.gz
-                       if [ $? -gt 0 ]; then
+                       
errorLog=/var/log/wikidatadump/dumpwikidatajson-$filename-$i.log
+                       php $multiversionscript 
extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpJson.php --wiki 
wikidatawiki --shard $i --sharding-factor $shards --snippet 2>> $errorLog | 
gzip > $tempDir/wikidataJson.$i.gz
+                       exitCode=$?
+                       if [ $exitCode -gt 0 ]; then
+                               echo -e "\n\nProcess failed with exit code 
$exitCode" >> $errorLog
                                echo 1 > $failureFile
                        fi
                ) &
@@ -40,8 +44,9 @@
        if [ -f $failureFile ]; then
                # Something went wrong, let's clean up and maybe retry. Leave 
logs in place.
                rm -f $failureFile
-               rm $tempDir/wikidataJson.*.gz
+               rm -f $tempDir/wikidataJson.*.gz
                let retries++
+               echo "Dumping one or more shards failed. Retrying." >> 
$mainLogFile
 
                if [ $retries -eq 3 ]; then
                        exit 1
@@ -60,8 +65,18 @@
 
 i=0
 while [ $i -lt $shards ]; do
-       cat $tempDir/wikidataJson.$i.gz >> $tempDir/wikidataJson.gz
-       rm $tempDir/wikidataJson.$i.gz
+       tempFile=$tempDir/wikidataJson.$i.gz
+       if [ ! -f $tempFile ]; then
+               echo "$tempFile does not exist. Aborting." >> $mainLogFile
+               exit 1
+       fi
+       fileSize=`stat --printf="%s" $tempFile`
+       if [ $fileSize -lt 1800000000 ]; then
+               echo "File size of $tempFile is only $fileSize. Aborting." >> 
$mainLogFile
+               exit 1
+       fi
+       cat $tempFile >> $tempDir/wikidataJson.gz
+       rm $tempFile
        let i++
        if [ $i -lt $shards ]; then
                # Shards don't end with commas so add commas to separate them
diff --git a/modules/snapshot/files/cron/dumpwikidatattl.sh 
b/modules/snapshot/files/cron/dumpwikidatattl.sh
index 3474ebc..1383bf0 100644
--- a/modules/snapshot/files/cron/dumpwikidatattl.sh
+++ b/modules/snapshot/files/cron/dumpwikidatattl.sh
@@ -2,7 +2,7 @@
 #############################################################
 # This file is maintained by puppet!
 # puppet:///modules/snapshot/cron/dumpwikidatattl.sh
-##############################################################
+#############################################################
 #
 # Generate a json dump for Wikidata and remove old ones.
 #
@@ -14,21 +14,25 @@
 targetFileGzip=$targetDir/$filename.ttl.gz
 targetFileBzip2=$targetDir/$filename.ttl.bz2
 failureFile=/tmp/dumpwikidatattl-failure
+mainLogFile=/var/log/wikidatadump/dumpwikidatattl-$filename-main.log
 
-i=0
 shards=4
 
 # Try to create the dump (up to three times).
 retries=0
 
 while true; do
+       i=0
        rm -f $failureFile
 
        while [ $i -lt $shards ]; do
                (
                        set -o pipefail
-                       php $multiversionscript 
extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpRdf.php --wiki 
wikidatawiki --shard $i --sharding-factor $shards --format ttl 2>> 
/var/log/wikidatadump/dumpwikidatattl-$filename-$i.log | gzip > 
$tempDir/wikidataTTL.$i.gz
-                       if [ $? -gt 0 ]; then
+                       
errorLog=/var/log/wikidatadump/dumpwikidatattl-$filename-$i.log
+                       php $multiversionscript 
extensions/Wikidata/extensions/Wikibase/repo/maintenance/dumpRdf.php --wiki 
wikidatawiki --shard $i --sharding-factor $shards --format ttl 2>> $errorLog | 
gzip > $tempDir/wikidataTTL.$i.gz
+                       exitCode=$?
+                       if [ $exitCode -gt 0 ]; then
+                               echo -e "\n\nProcess failed with exit code 
$exitCode" >> $errorLog
                                echo 1 > $failureFile
                        fi
                ) &
@@ -40,8 +44,9 @@
        if [ -f $failureFile ]; then
                # Something went wrong, let's clean up and maybe retry. Leave 
logs in place.
                rm -f $failureFile
-               rm $tempDir/wikidataTTL.*.gz
+               rm -f $tempDir/wikidataTTL.*.gz
                let retries++
+               echo "Dumping one or more shards failed. Retrying." >> 
$mainLogFile
 
                if [ $retries -eq 3 ]; then
                        exit 1
@@ -57,8 +62,18 @@
 
 i=0
 while [ $i -lt $shards ]; do
-       cat $tempDir/wikidataTTL.$i.gz >> $tempDir/wikidataTtl.gz
-       rm $tempDir/wikidataTTL.$i.gz
+       tempFile=$tempDir/wikidataTTL.$i.gz
+       if [ ! -f $tempFile ]; then
+               echo "$tempFile does not exist. Aborting." >> $mainLogFile
+               exit 1
+       fi
+       fileSize=`stat --printf="%s" $tempFile`
+       if [ $fileSize -lt 1800000000 ]; then
+               echo "File size of $tempFile is only $fileSize. Aborting." >> 
$mainLogFile
+               exit 1
+       fi
+       cat $tempFile >> $tempDir/wikidataTtl.gz
+       rm $tempFile
        let i++
 done
 

-- 
To view, visit https://gerrit.wikimedia.org/r/311551
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I42c2a51fa4580f4c848fe55c5833017cee79a5e8
Gerrit-PatchSet: 6
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Hoo man <h...@online.de>
Gerrit-Reviewer: ArielGlenn <ar...@wikimedia.org>
Gerrit-Reviewer: Aude <aude.w...@gmail.com>
Gerrit-Reviewer: Hoo man <h...@online.de>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to