Milimetric has submitted this change and it was merged.

Change subject: Allow to reuse local checkouts of the data repositories
......................................................................


Allow to reuse local checkouts of the data repositories

Change-Id: I26adf851d8506fbe5226e9436573da249d11087b
---
M scripts/check_web_page.sh
1 file changed, 77 insertions(+), 2 deletions(-)

Approvals:
  Milimetric: Verified; Looks good to me, approved



diff --git a/scripts/check_web_page.sh b/scripts/check_web_page.sh
index b942168..0ccabad 100755
--- a/scripts/check_web_page.sh
+++ b/scripts/check_web_page.sh
@@ -87,6 +87,11 @@
 USE_CACHE=no
 #USE_CACHE=yes
 
+# Add directories here that are checkous of the data repositories. It
+# is tried to copy data over from them instead of downloading it from
+# the web. This is mostly useful for debugging.
+LOCAL_DATA_CHECKOUTS_DIR_RELI=()
+
 #---------------------------------------------------
 # Prints the script's help screen
 #
@@ -105,6 +110,12 @@
 
 OPTIONS:
 --help, -h       -- prints this help page
+--add-checkout DIR
+                 -- before downloading files, try to find them in
+                    DIR. You can pass this option multiple times.
+                    This is useful for debugging, with DIR being a
+                    checkout of the geowiki-data or dashboard-data
+                    repository.
 --cache          -- cache the downloaded documents into /tmp/geowiki_monitor...
                     and reuse them on subsequent runs. This is useful
                     for debugging the script. But you'll have to
@@ -123,6 +134,7 @@
 #
 # Output:
 #   USE_CACHE
+#   LOCAL_DATA_CHECKOUTS_DIR_RELI
 #   VERBOSITY
 #
 parse_arguments() {
@@ -134,6 +146,11 @@
             "--help" | "-h" )
                 print_help
                 exit 0
+                ;;
+            "--add-checkout" )
+               [[ $# -ge 1 ]] || error "$ARGUMENT requires a further parameter"
+               LOCAL_DATA_CHECKOUTS_DIR_RELI=( 
"${LOCAL_DATA_CHECKOUTS_DIR_RELI[@]}" "$1" )
+               shift
                 ;;
             "--cache" )
                 USE_CACHE="yes"
@@ -258,8 +275,66 @@
 #
 do_download_file() {
     local URL="$1"
-    log "$VERBOSITY_VERBOSE" "Downloading $URL ..."
-    wget -O "$DOWNLOADED_FILE_ABS" -o /dev/null "$URL"
+
+    # LOCAL_COPY_FILE_RELI is either empty (if no local copy of $URL
+    # has yet been found), or is holds the file name to the found
+    # local copy of $URL.
+    local LOCAL_COPY_FILE_RELI=
+
+    # The file we try to find on local data checkouts.
+    local NEEDLE="$URL"
+    # Strip leading URL_BASE if present
+    if [ "${NEEDLE:0:${#URL_BASE}}" = "${URL_BASE}" ]
+    then
+       NEEDLE="${NEEDLE:${#URL_BASE}}"
+    fi
+    # Strip data and gp from /data/.../gp/ paths. This helps finding
+    # data files.
+    NEEDLE="$(echo "${NEEDLE}" | sed -e 's@^/data/\(.*\)/gp/@\1/@')"
+
+    for LOCAL_DATA_CHECKOUT_DIR_RELI in "${LOCAL_DATA_CHECKOUTS_DIR_RELI[@]}"
+    do
+       local CANDIDATE_FILE_RELI=
+
+       # Check for direct match
+       if [ -z "$LOCAL_COPY_FILE_RELI" ]
+       then
+           local CANDIDATE_FILE_RELI="$LOCAL_DATA_CHECKOUT_DIR_RELI/$NEEDLE"
+           if [ -e "$CANDIDATE_FILE_RELI" ]
+           then
+               LOCAL_COPY_FILE_RELI="$CANDIDATE_FILE_RELI"
+           fi
+       fi
+
+       # Check for json extension match. This helps to find dashboards.
+       if [ -z "$LOCAL_COPY_FILE_RELI" ]
+       then
+           local 
CANDIDATE_FILE_RELI="$LOCAL_DATA_CHECKOUT_DIR_RELI/$NEEDLE.json"
+           if [ -e "$CANDIDATE_FILE_RELI" ]
+           then
+               LOCAL_COPY_FILE_RELI="$CANDIDATE_FILE_RELI"
+           fi
+       fi
+
+       # Check for handmade matches
+       if [ -z "$LOCAL_COPY_FILE_RELI" ]
+       then
+           local CANDIDATE_FILE_RELI="$LOCAL_DATA_CHECKOUT_DIR_RELI/$NEEDLE"
+           CANDIDATE_FILE_RELI="${CANDIDATE_FILE_RELI/graphs/graphs/handmade}"
+           if [ -e "$CANDIDATE_FILE_RELI" ]
+           then
+               LOCAL_COPY_FILE_RELI="$CANDIDATE_FILE_RELI"
+           fi
+       fi
+    done
+
+    if [ ! -z "$LOCAL_COPY_FILE_RELI" -a -e "$LOCAL_COPY_FILE_RELI" ]
+    then
+       cp "$LOCAL_COPY_FILE_RELI" "$DOWNLOADED_FILE_ABS"
+    else
+       log "$VERBOSITY_VERBOSE" "Downloading $URL ..."
+       wget -O "$DOWNLOADED_FILE_ABS" -o /dev/null "$URL"
+    fi
 }
 
 #---------------------------------------------------

-- 
To view, visit https://gerrit.wikimedia.org/r/85610
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I26adf851d8506fbe5226e9436573da249d11087b
Gerrit-PatchSet: 2
Gerrit-Project: analytics/geowiki
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Milimetric <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to