Gehel has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/399954 )

Change subject: Add loading DCAT-AP data into dcatap namespace on WDQS
......................................................................


Add loading DCAT-AP data into dcatap namespace on WDQS

Bug: T178978
Change-Id: Ie1d7eb7567d1409fb1116a71b23b4d5160aa2c7b
---
A modules/wdqs/files/cron/cronUtils.sh
M modules/wdqs/files/cron/reloadCategories.sh
A modules/wdqs/files/cron/reloadDCAT-AP.sh
M modules/wdqs/manifests/gui.pp
4 files changed, 118 insertions(+), 31 deletions(-)

Approvals:
  jenkins-bot: Verified
  Gehel: Looks good to me, approved



diff --git a/modules/wdqs/files/cron/cronUtils.sh 
b/modules/wdqs/files/cron/cronUtils.sh
new file mode 100755
index 0000000..4d2487a
--- /dev/null
+++ b/modules/wdqs/files/cron/cronUtils.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Cron helper functions
+if [ -r /etc/wdqs/vars.sh ]; then
+  . /etc/wdqs/vars.sh
+fi
+
+if [ -r /etc/wdqs/gui_vars.sh ]; then
+  . /etc/wdqs/gui_vars.sh
+fi
+
+if [ -z "${DATA_DIR}" -o -z "${LOG_DIR}" -o -z "${DEPLOY_DIR}" ]; then
+       echo "Variables not set up right!"
+       exit 1
+fi
+
+HOST="http://localhost:9999";
+NAMESPACE_URL="/bigdata/namespace/"
+today=$(date -u +'%Y%m%d')
+
+function loadFileIntoBlazegraph {
+       # source URL
+       local URL=$1
+       # local filename (will be in DATA_DIR)
+       local fileName=$2
+       local sparqlEndpoint=$3
+       curl -s -f -XGET $URL -o ${DATA_DIR}/${fileName}
+       if [ ! -s ${DATA_DIR}/${fileName} ]; then
+               echo "Could not download $URL into ${fileName}"
+               exit 1
+       fi      
+       curl -s -XPOST --data-binary update="LOAD <file://$DATA_DIR/$FILENAME>" 
$sparqlEndpoint
+}
+
+# NOTE: This should be run under user that has rights to
+# sudo systemctl reload nginx
+function replaceNamespace {
+       local mainName=$1
+       local currentAlias=$2
+       local oldNamespace=$(cat $ALIAS_FILE | grep $mainName | cut -d' ' -f2 | 
cut -d ';' -f1)
+       if [ "${oldNamespace}" = ${currentAlias} ]; then
+               # nothing to do
+               return
+       fi
+       if [ -n "${oldNamespace}" ]; then
+               sed -i "/${mainName}/c ${mainName} ${currentAlias};" $ALIAS_FILE
+       else
+               echo "${mainName} ${currentAlias};" >> $ALIAS_FILE
+       fi
+       # Bump nginx to reload config
+       sudo systemctl reload nginx
+       if [ -n "${oldNamespace}" ]; then
+               # Drop old namespace
+               curl -s -X DELETE "${HOST}${NAMESPACE_URL}${oldNamespace}"
+       fi
+}
\ No newline at end of file
diff --git a/modules/wdqs/files/cron/reloadCategories.sh 
b/modules/wdqs/files/cron/reloadCategories.sh
index 33b83e9..2710a19 100755
--- a/modules/wdqs/files/cron/reloadCategories.sh
+++ b/modules/wdqs/files/cron/reloadCategories.sh
@@ -2,35 +2,14 @@
 # This script is reloading categories into a new namespace
 # NOTE: This should be run under user that has rights to
 # sudo systemctl reload nginx
-if [ -r /etc/wdqs/vars.sh ]; then
-  . /etc/wdqs/vars.sh
-fi
+. /usr/local/bin/cronUtils.sh
 
-if [ -r /etc/wdqs/gui_vars.sh ]; then
-  . /etc/wdqs/gui_vars.sh
-fi
-
-if [ -z "${DATA_DIR}" -o -z "${LOG_DIR}" -o -z "${DEPLOY_DIR}" ]; then
-       echo "Variables not set up right!"
-       exit 1
-fi
-
-today=$(date -u +'%Y%m%d')
 newNamespace="categories${today}"
 # Drop old dumps
 rm -f ${DATA_DIR}/*-categories.ttl.gz
 cd $DEPLOY_DIR
+# Create new namespace
 bash createNamespace.sh $newNamespace || exit 1
 # Load the data
 bash forAllCategoryWikis.sh loadCategoryDump.sh $newNamespace >> 
"${LOG_DIR}/${newNamespace}.log"
-# Get old namespace
-oldNamespace=$(cat $ALIAS_FILE | grep categories | cut -d' ' -f2 | cut -d ';' 
-f1)
-# Switch the map
-# NOTE: right now it overrides the map. If we reuse it for other purposes, 
this needs to be made smarter.
-echo "categories ${newNamespace};" > $ALIAS_FILE
-# Bump nginx to reload config
-sudo systemctl reload nginx
-if [ -n "${oldNamespace}" ]; then
-       # Drop old namespace
-       curl -s -X DELETE 
"http://localhost:9999/bigdata/namespace/${oldNamespace}";
-fi
+replaceNamespace categories $newNamespace
diff --git a/modules/wdqs/files/cron/reloadDCAT-AP.sh 
b/modules/wdqs/files/cron/reloadDCAT-AP.sh
new file mode 100755
index 0000000..618bddc
--- /dev/null
+++ b/modules/wdqs/files/cron/reloadDCAT-AP.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# This script is reloading DCAT-AP data from Wikidata
+# NOTE: This should be run under user that has rights to
+# sudo systemctl reload nginx
+. /usr/local/bin/cronUtils.sh
+
+DCAT_SOURCE=${DCAT_SOURCE:-"https://dumps.wikimedia.org/wikidatawiki/entities/dcatap.rdf"}
+
+newNamespace="dcatap${today}"
+# Drop old dumps
+rm -f ${DATA_DIR}/dcatap-*.rdf
+cd $DEPLOY_DIR
+# Create new NS
+bash createNamespace.sh $newNamespace || exit 1
+# Load the data
+FILENAME=dcatap-${today}.rdf
+loadFileIntoBlazegraph $DCAT_SOURCE $FILENAME 
"${HOST}${NAMESPACE_URL}${newNamespace}/sparql"
+replaceNamespace dcatap $newNamespace
diff --git a/modules/wdqs/manifests/gui.pp b/modules/wdqs/manifests/gui.pp
index 458e38e..d056705 100644
--- a/modules/wdqs/manifests/gui.pp
+++ b/modules/wdqs/manifests/gui.pp
@@ -32,7 +32,7 @@
     }
 
     # List of namespace aliases in format:
-    # ALIAS REAL_NAME
+    # ALIAS REAL_NAME;
     # This map is generated manually or by category update script
     file { $alias_map:
         ensure => present,
@@ -59,13 +59,28 @@
         mode    => '0644',
     }
 
+    file { '/usr/local/bin/cronUtils.sh':
+        ensure => present,
+        source => 'puppet:///modules/wdqs/cron/cronUtils.sh',
+        owner  => 'root',
+        group  => 'root',
+        mode   => '0755',
+    }
+
     file { '/usr/local/bin/reloadCategories.sh':
-        ensure  => present,
-        source  => 'puppet:///modules/wdqs/cron/reloadCategories.sh',
-        owner   => 'root',
-        group   => 'root',
-        mode    => '0755',
-        require => File['/etc/wdqs/gui_vars.sh'],
+        ensure => present,
+        source => 'puppet:///modules/wdqs/cron/reloadCategories.sh',
+        owner  => 'root',
+        group  => 'root',
+        mode   => '0755',
+    }
+
+    file { '/usr/local/bin/reloadDCAT-AP.sh':
+        ensure => present,
+        source => 'puppet:///modules/wdqs/cron/reloadDCAT-AP.sh',
+        owner  => 'root',
+        group  => 'root',
+        mode   => '0755',
     }
 
     $cron_log = "${log_dir}/reloadCategories.log"
@@ -88,6 +103,15 @@
         hour    => fqdn_rand(2),
     }
 
+    cron { 'reload-dcatap':
+        ensure  => present,
+        command => "/usr/local/bin/reloadDCAT-AP.sh >> ${log_dir}/dcat.log",
+        user    => $username,
+        weekday => 4,
+        minute  => 0,
+        hour    => 10,
+    }
+
     logrotate::rule { 'wdqs-reload-categories':
         ensure       => present,
         file_glob    => $cron_log,
@@ -99,6 +123,17 @@
         su           => "${username} wikidev",
     }
 
+    logrotate::rule { 'wdqs-reload-dcat':
+        ensure       => present,
+        file_glob    => "${log_dir}/dcatap.log",
+        frequency    => 'monthly',
+        missing_ok   => true,
+        not_if_empty => true,
+        rotate       => 3,
+        compress     => true,
+        su           => "${username} wikidev",
+    }
+
     # Remove categories*.log files after 30 days.
     logrotate::rule { 'wdqs-categories-logs':
         ensure       => present,

-- 
To view, visit https://gerrit.wikimedia.org/r/399954
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Ie1d7eb7567d1409fb1116a71b23b4d5160aa2c7b
Gerrit-PatchSet: 7
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Smalyshev <smalys...@wikimedia.org>
Gerrit-Reviewer: Gehel <guillaume.leder...@wikimedia.org>
Gerrit-Reviewer: Lokal Profil <lokal.pro...@gmail.com>
Gerrit-Reviewer: Smalyshev <smalys...@wikimedia.org>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to