Hello Ottomata,

I'd like you to do a code review.  Please visit

    https://gerrit.wikimedia.org/r/188005

to review the following change.

Change subject: Prepare webrequest status dump script for daily datasets
......................................................................

Prepare webrequest status dump script for daily datasets

No daily dataset is yet added. The legacy_tsvs as first daily dataset
will get added in a follow-up commit.

Change-Id: I7c0ea9c2e0bc37ffe27a6d736ab9b478a3b3cfd3
---
M bin/refinery-dump-status-webrequest-partitions
1 file changed, 172 insertions(+), 39 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery 
refs/changes/05/188005/1

diff --git a/bin/refinery-dump-status-webrequest-partitions 
b/bin/refinery-dump-status-webrequest-partitions
index 44012a5..51348ce 100755
--- a/bin/refinery-dump-status-webrequest-partitions
+++ b/bin/refinery-dump-status-webrequest-partitions
@@ -46,26 +46,32 @@
 
 ALL_DATASETS=()
 
+declare -A DATASET_RECURRENCES
 declare -A DATASET_CAPTIONS
 declare -A DATASET_HLINES
+declare -A DATASET_BLANKS
 declare -A DATASET_VISIBILITIES
 
 add_dataset() {
     local DATASET="$1"
-    local DATASET_CAPTION="$2"
+    local DATASET_RECURRENCE="$2"
+    local DATASET_CAPTION="$3"
     local DATASET_HLINE="$(sed -e 's/[^|]/-/g; s/|/+/g' <<<"$DATASET_CAPTION")"
+    local DATASET_BLANK="${DATASET_HLINE//-/ }" ; 
DATASET_BLANK="${DATASET_BLANK//+/|}"
 
     ALL_DATASETS=( "${ALL_DATASETS[@]}" "$DATASET" )
 
+    DATASET_RECURRENCES["$DATASET"]="$DATASET_RECURRENCE"
     DATASET_CAPTIONS["$DATASET"]="$DATASET_CAPTION"
     DATASET_HLINES["$DATASET"]="$DATASET_HLINE"
+    DATASET_BLANKS["$DATASET"]="$DATASET_BLANK"
     DATASET_VISIBILITIES["$DATASET"]=no
 }
 
-add_dataset "pagecounts_all_sites" " file name date  |  page   | project |"
-add_dataset "pagecounts_raw" " file name date  |  page   | project |"
-add_dataset "raw_webrequest" "  bits  |  misc  | mobile |  text  | upload |"
-add_dataset "webrequest" "  bits  |  misc  | mobile |  text  | upload |"
+add_dataset "pagecounts_all_sites" "hourly" " file name date  |  page   | 
project |"
+add_dataset "pagecounts_raw" "hourly" " file name date  |  page   | project |"
+add_dataset "raw_webrequest" "hourly" "  bits  |  misc  | mobile |  text  | 
upload |"
+add_dataset "webrequest" "hourly" "  bits  |  misc  | mobile |  text  | upload 
|"
 
 DATASET_VISIBILITIES["raw_webrequest"]=yes
 
@@ -183,52 +189,138 @@
 hline() {
     local KIND="$1"
 
-    log_no_lf "  ++------------------++"
-    for DATASET in "${ALL_DATASETS[@]}"
-    do
-        if [ "${DATASET_VISIBILITIES["$DATASET"]}" = "yes" ]
-        then
-            local DATASET_HLINE="${DATASET_HLINES["$DATASET"]}"
-            if [ "$KIND" = "first" ]
+    log_no_lf "  "
+
+    # daily datasets first
+    if [ "$HAS_VISIBLE_DAILY_DATASETS" = yes ]
+    then
+        log_no_lf "++---------------++"
+        for DATASET in "${ALL_DATASETS[@]}"
+        do
+            if [ "${DATASET_VISIBILITIES["$DATASET"]}" = "yes" -a 
"${DATASET_RECURRENCES["$DATASET"]}" = "daily" ]
             then
-                DATASET_HLINE="${DATASET_HLINE//+-/--}"
+                local DATASET_HLINE="${DATASET_HLINES["$DATASET"]}"
+                if [ "$KIND" = "first" ]
+                then
+                    DATASET_HLINE="${DATASET_HLINE//+-/--}"
+                fi
+                log_no_lf "${DATASET_HLINE}+"
             fi
-            log_no_lf "${DATASET_HLINE}+"
-        fi
-    done
+        done
+    fi
+
+    # Now for the hourly datasets
+    if [ "$HAS_VISIBLE_HOURLY_DATASETS" = yes ]
+    then
+        log_no_lf "++------------------++"
+        for DATASET in "${ALL_DATASETS[@]}"
+        do
+            if [ "${DATASET_VISIBILITIES["$DATASET"]}" = "yes" -a 
"${DATASET_RECURRENCES["$DATASET"]}" = "hourly" ]
+            then
+                local DATASET_HLINE="${DATASET_HLINES["$DATASET"]}"
+                if [ "$KIND" = "first" ]
+                then
+                    DATASET_HLINE="${DATASET_HLINE//+-/--}"
+                fi
+                log_no_lf "${DATASET_HLINE}+"
+            fi
+        done
+    fi
     log
 }
 
 first_caption_line() {
     local DATASET
 
-    log_no_lf "  ||                  ||"
-    for DATASET in "${ALL_DATASETS[@]}"
-    do
-        if [ "${DATASET_VISIBILITIES["$DATASET"]}" = "yes" ]
-        then
-            local DATASET_CAPTION="${DATASET_CAPTIONS["$DATASET"]}"
-            local DATASET_CAPTION_LEN="${#DATASET_CAPTION}"
-            log_no_lf_centered "$DATASET" $((DATASET_CAPTION_LEN-1))
-            log_no_lf "||"
-        fi
-    done
+    log_no_lf "  "
+
+    # daily datasets first
+    if [ "$HAS_VISIBLE_DAILY_DATASETS" = yes ]
+    then
+        log_no_lf "||               ||"
+        for DATASET in "${ALL_DATASETS[@]}"
+        do
+            if [ "${DATASET_VISIBILITIES["$DATASET"]}" = "yes" -a 
"${DATASET_RECURRENCES["$DATASET"]}" = "daily" ]
+            then
+                local DATASET_CAPTION="${DATASET_CAPTIONS["$DATASET"]}"
+                local DATASET_CAPTION_LEN="${#DATASET_CAPTION}"
+                log_no_lf_centered "$DATASET" $((DATASET_CAPTION_LEN-1))
+                log_no_lf "||"
+            fi
+        done
+    fi
+
+    # Now for the hourly datasets
+    if [ "$HAS_VISIBLE_HOURLY_DATASETS" = yes ]
+    then
+        log_no_lf "||                  ||"
+        for DATASET in "${ALL_DATASETS[@]}"
+        do
+            if [ "${DATASET_VISIBILITIES["$DATASET"]}" = "yes" -a 
"${DATASET_RECURRENCES["$DATASET"]}" = "hourly" ]
+            then
+                local DATASET_CAPTION="${DATASET_CAPTIONS["$DATASET"]}"
+                local DATASET_CAPTION_LEN="${#DATASET_CAPTION}"
+                log_no_lf_centered "$DATASET" $((DATASET_CAPTION_LEN-1))
+                log_no_lf "||"
+            fi
+        done
+    fi
     log
 }
 
 second_caption_line() {
     local DATASET
 
-    log_no_lf "  || Hour             ||"
+    log_no_lf "  "
+
+    # daily datasets first
+    if [ "$HAS_VISIBLE_DAILY_DATASETS" = yes ]
+    then
+        log_no_lf "||      Day      ||"
+        for DATASET in "${ALL_DATASETS[@]}"
+        do
+            if [ "${DATASET_VISIBILITIES["$DATASET"]}" = "yes" -a 
"${DATASET_RECURRENCES["$DATASET"]}" = "daily" ]
+            then
+                log_no_lf "${DATASET_CAPTIONS["$DATASET"]}"
+                log_no_lf "|"
+            fi
+        done
+    fi
+
+    # Now for the hourly datasets
+    if [ "$HAS_VISIBLE_HOURLY_DATASETS" = yes ]
+    then
+        log_no_lf "||       Hour       ||"
+        for DATASET in "${ALL_DATASETS[@]}"
+        do
+            if [ "${DATASET_VISIBILITIES["$DATASET"]}" = "yes" -a 
"${DATASET_RECURRENCES["$DATASET"]}" = "hourly" ]
+            then
+                log_no_lf "${DATASET_CAPTIONS["$DATASET"]}"
+                log_no_lf "|"
+            fi
+        done
+    fi
+    log
+}
+
+determine_recurrence_visibility() {
+    HAS_VISIBLE_DAILY_DATASETS=no
+    HAS_VISIBLE_HOURLY_DATASETS=no
     for DATASET in "${ALL_DATASETS[@]}"
     do
         if [ "${DATASET_VISIBILITIES["$DATASET"]}" = "yes" ]
         then
-            log_no_lf "${DATASET_CAPTIONS["$DATASET"]}"
-            log_no_lf "|"
+            if [ "${DATASET_RECURRENCES["$DATASET"]}" = "daily" ]
+            then
+                HAS_VISIBLE_DAILY_DATASETS=yes
+            elif [ "${DATASET_RECURRENCES["$DATASET"]}" = "hourly" ]
+            then
+                HAS_VISIBLE_HOURLY_DATASETS=yes
+            else
+                error "Unknown recurrence '${DATASET_RECURRENCES["$DATASET"]}' 
for dataset '$DATASET'"
+            fi
         fi
     done
-    log
 }
 
 dump_dataset_pagecounts_file() {
@@ -328,6 +420,8 @@
     done
 }
 
+determine_recurrence_visibility
+
 hline "first"
 first_caption_line
 second_caption_line
@@ -343,16 +437,55 @@
         hline
     fi
 
-    log_no_lf "  || ${DATE// /T}/1H ||"
-    for DATASET in "${ALL_DATASETS[@]}"
-    do
-        if [ "${DATASET_VISIBILITIES["$DATASET"]}" = "yes" ]
+    # Check if this hour produces output at all. If not, omit it
+    # completely. This omitting allows to not get superfluous 23 empty lines,
+    # when only daily datasets are visible.
+    if [ \( "$HAS_VISIBLE_DAILY_DATASETS" = yes -a "${DATE: -2}" = "00" \) \
+        -o "$HAS_VISIBLE_HOURLY_DATASETS" = yes ]
+    then
+
+        log_no_lf "  "
+
+        # daily datasets first
+        if [ "$HAS_VISIBLE_DAILY_DATASETS" = yes ]
         then
-            dump_dataset_$DATASET "$DATE"
-            log_no_lf "|"
+            if [ "${DATE: -2}" = "00" ]
+            then
+                log_no_lf "|| ${DATE:0:10}/1D ||"
+            else
+                log_no_lf "||               ||"
+            fi
+
+            for DATASET in "${ALL_DATASETS[@]}"
+            do
+                if [ "${DATASET_VISIBILITIES["$DATASET"]}" = "yes" -a 
"${DATASET_RECURRENCES["$DATASET"]}" = "daily" ]
+                then
+                    if [ "${DATE: -2}" = "00" ]
+                    then
+                        dump_dataset_$DATASET "$DATE"
+                    else
+                        log_no_lf "${DATASET_BLANKS["$DATASET"]]}"
+                    fi
+                    log_no_lf "|"
+                fi
+            done
         fi
-    done
-    log
+
+        # Now for the hourly datasets
+        if [ "$HAS_VISIBLE_HOURLY_DATASETS" = yes ]
+        then
+            log_no_lf "|| ${DATE// /T}/1H ||"
+            for DATASET in "${ALL_DATASETS[@]}"
+            do
+                if [ "${DATASET_VISIBILITIES["$DATASET"]}" = "yes" -a 
"${DATASET_RECURRENCES["$DATASET"]}" = "hourly" ]
+                then
+                    dump_dataset_$DATASET "$DATE"
+                    log_no_lf "|"
+                fi
+            done
+        fi
+        log
+    fi
 done
 
 hline

-- 
To view, visit https://gerrit.wikimedia.org/r/188005
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I7c0ea9c2e0bc37ffe27a6d736ab9b478a3b3cfd3
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to