Ottomata has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/334435 )

Change subject: Add hardsync shell script
......................................................................

Add hardsync shell script

This uses hardlinks to sync multiple source directory content
into a single directory.

Bug: T125854
Change-Id: I58fbe3242a5895d288a2170df6226e0cf7cf09a1
---
A modules/statistics/files/hardsync.sh
M modules/statistics/manifests/web.pp
2 files changed, 151 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/puppet 
refs/changes/35/334435/1

diff --git a/modules/statistics/files/hardsync.sh 
b/modules/statistics/files/hardsync.sh
new file mode 100755
index 0000000..9573cf2
--- /dev/null
+++ b/modules/statistics/files/hardsync.sh
@@ -0,0 +1,140 @@
+#!/bin/bash
+
+# Sync multiple source directory contents into one destination using hardlinks.
+# During each subsequent run, the destination directory
+# will be created anew and then replace the original.
+
+script_name=$(basename $0)
+
+function usage {
+    echo "
+${script_name} [-h] [-v] [-n] [-t <base-temp-directory>] SRC... DEST
+
+OPTIONS:
+  -h  Print this usage message
+  -v  Enable verbose logging of shell commands that are run
+  -n  perform a dry run with no changes made
+  -t  The base directory to create temp directories in.  Default: /tmp
+
+DESCRIPTION:
+  Syncs multiple source directories into a final destination directory
+  using hard links. The final destination directory will be
+  re-created from the source directories each run of this command.
+  The original destination directory will be moved away and the newly
+  created and hard synced destination directory will be moved to its place.
+  The original will then be deleted.  Since all of this uses hard links
+  (via cp -al), the extra 'copies' of these directories will not take
+  up (much) additional filesystem space.
+
+  ${script_name} is useful if you want to present the contents of multiple
+  directories as one, but still want to allow people to delete contents out
+  of the source directories.  If it weren't for the delete problem, rsync
+  would be sufficient to solve this problem.
+
+EXAMPLE:
+  ${script_name} /my/dir1 /my/dir2 /my/dest
+
+  This will copy the contents of /my/dir1/* and /my/dir2/* into
+  /my/dest/ as hardlinks.  WARNING: /my/dest as is before
+  this command runs will be deleted.  A new /my/dest will be moved
+  into its place containing a fresh hardlink sync from the
+  source directories.
+"
+
+exit 0
+}
+
+
+# Exit if any error is encountered
+set -e
+
+# Echos $@ to stdout prepended by a timestamp
+function log {
+    echo $(date +"%Y-%m-%dT%H:%M:%S") "$@"
+}
+
+# Logs $@ to stdout prepended by ERROR, and then exit 1.
+function fatal {
+    log ERROR: $@
+    exit 1
+}
+
+# Run $@ as a shell command.
+# If $verbose, log it first.
+# If $dry_run, don't actually run it.
+function cmd {
+    if [ $verbose -eq 1 ]; then
+        log $@
+    fi
+    if [ $dry_run -eq 0 ]; then
+        $@
+    fi
+}
+
+
+verbose=0
+dry_run=0
+base_temp_dir=/tmp
+
+while getopts "hvnt:" opt; do
+    case "$opt" in
+    h)
+        usage
+        ;;
+    v)  verbose=1
+        ;;
+    n)  dry_run=1
+        ;;
+    t)
+        base_temp_dir=$OPTARG
+        ;;
+    esac
+done
+
+shift $((OPTIND-1))
+
+[ "$1" = "--" ] && shift
+
+# All but the last argument should be treated as source directories
+argc=$(($#-1))
+source_dirs=${@:1:$argc}
+
+# The last argument is the destination directory
+dest_dir="${!#}"
+
+# Check that we have $source_dirs and a $dest_dir.
+if [ -z "${source_dirs}" -o -z "${dest_dir}" ]; then
+    fatal "Must specify at least one source directory and exactly one 
destination directory. Aborting."
+fi
+
+# Check that all $source_dirs exist and are directories.
+for source_dir in $source_dirs; do
+    test -d $source_dir || fatal "Source directory '${source_dir}' is not a 
directory. Aborting."
+done
+
+
+# Begin hard syncing
+
+log "Hard syncing $source_dirs into $dest_dir..."
+
+if [ $dry_run -eq 1 ]; then
+    mktemp_dry_run='u'
+fi
+# Make a temporary new destination directory in which to hardsync the sources.
+temp_dest=$(mktemp -d$mktemp_dry_run $base_temp_dir/.hardsync.$(basename 
$dest_dir).XXXXXXXXXXXX)
+
+# Later, before we mv $temp_dest to $dest_dir, the old $dest_dir will be moved 
to $temp_dest_trash.
+# After $temp_dest has moved to $dest_dir, we can delete $temp_dest_trash
+temp_dest_trash=$(mktemp -d$mktemp_dry_run $base_temp_dir/.hardsync.$(basename 
$dest_dir).trash.XXXXXXXXXXXX)
+
+# cp -al each source dir into temp dest
+for source_dir in $source_dirs; do
+    cmd cp -al $source_dir/* $temp_dest/
+done
+
+#  Remove any existent $dest_dir and mv $temp_dest to $dest_dir
+test -e $dest_dir && cmd mv -f $dest_dir $temp_dest_trash
+cmd mv -f $temp_dest $dest_dir
+cmd rm -rf $temp_dest_trash
+
+log "Finished hard syncing $source_dirs into $dest_dir"
diff --git a/modules/statistics/manifests/web.pp 
b/modules/statistics/manifests/web.pp
index b3211b5..90fa25f 100644
--- a/modules/statistics/manifests/web.pp
+++ b/modules/statistics/manifests/web.pp
@@ -22,6 +22,17 @@
         'zip',
     ])
 
+    # Install hardsync shell script.
+    # This allows us to present the contents of multiple source directories
+    # in a single directory by hardlink copying the files into the destination.
+    # This is mainly used so dataset files from multiple stat* boxes can
+    # be published in a single directory.  See: T125854
+    file { '/usr/local/bin/hardsync':
+        source => 'puppet:///modules/statistics/hardsync.sh',
+        mode   => '0755',
+        owner  => 'root',
+        group  => 'root',
+    }
 
     include ::apache::mod::rewrite
     include ::apache::mod::proxy

-- 
To view, visit https://gerrit.wikimedia.org/r/334435
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I58fbe3242a5895d288a2170df6226e0cf7cf09a1
Gerrit-PatchSet: 1
Gerrit-Project: operations/puppet
Gerrit-Branch: production
Gerrit-Owner: Ottomata <ao...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to