Volans has submitted this change and it was merged.
Change subject: DBtools: add script to check external storage
......................................................................
DBtools: add script to check external storage
Usage: ${0} -s START_TIMESTAMP -e END_TIMESTAMP -r REVISION_DB
-o ORIGIN_DB -t TARGET_DB -c CLUSTER -d DATABASES_FILE
Example: ${0} -s 20160131235959 -e 20160201020000
-r dbstore1002.eqiad.wmnet -o es1019.eqiad.wmnet
-t es2019.codfw.wmnet -c 25 -d databases.txt
External Storage check script to ensure that the content of a group of
blobs id and text content is the same between the ORIGIN_DB server and
the TARGET_DB one for all revisions created between START_TIMESTAMP
and END_TIMESTAMP for the given CLUSTER.
The revision table to get the timestamps is checked on the REVISION_DB
server.
DATABASES_FILE is expected to be a file with all the databases to be
checked, one per line.
Timestamps must be in the format used by MediaWiki: YYYYmmddHHMMSS
All parameters are mandatory.
Bug: T130702
Change-Id: I8df901bbed858117393b182920bae52e07550ca2
---
A dbtools/external_storage_consistency_check.sh
1 file changed, 157 insertions(+), 0 deletions(-)
Approvals:
Jcrespo: Looks good to me, but someone else must approve
Volans: Verified; Looks good to me, approved
diff --git a/dbtools/external_storage_consistency_check.sh
b/dbtools/external_storage_consistency_check.sh
new file mode 100755
index 0000000..b459c87
--- /dev/null
+++ b/dbtools/external_storage_consistency_check.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+function usage() {
+ cat << USAGE
+Usage: ${0} -s START_TIMESTAMP -e END_TIMESTAMP -r REVISION_DB -o ORIGIN_DB
+ -t TARGET_DB -c CLUSTER -d DATABASES_FILE
+
+Example: ${0} -s 20160131235959 -e 20160201020000 -r dbstore1002.eqiad.wmnet
+ -o es1019.eqiad.wmnet -t es2019.codfw.wmnet -c 25 -d databases.txt
+
+External Storage check script to ensure that the content of a group of blobs id
+and text content is the same between the ORIGIN_DB server and the TARGET_DB one
+for all revisions created between START_TIMESTAMP and END_TIMESTAMP for the
+given CLUSTER.
+The revision table to get the timestamps is checked on the REVISION_DB server.
+DATABASES_FILE is expected to be a file with all the databases to be checked,
+one per line.
+Timestamps must be in the format used by MediaWiki: YYYYmmddHHMMSS
+All parameters are mandatory.
+USAGE
+ exit 1
+}
+
+function delta_time() {
+ echo "$(date +"%s.%N") ${START}" | awk '{printf "%.3f", $1 - $2}'
+}
+
+while getopts ":s:e:r:o:t:c:d:" OPT; do
+ case ${OPT} in
+ s) START_TIMESTAMP="${OPTARG}" ;;
+ e) END_TIMESTAMP="${OPTARG}" ;;
+ r) REVISION_DB="${OPTARG}" ;;
+ o) ORIGIN_DB="${OPTARG}" ;;
+ t) TARGET_DB="${OPTARG}" ;;
+ c) CLUSTER="${OPTARG}" ;;
+ d) DATABASES_FILE="${OPTARG}" ;;
+ \?)
+ echo "Invalid option: -${OPTARG}" >&2
+ usage
+ ;;
+ :)
+ echo "Option -${OPTARG} requires an argument." >&2
+ usage
+ ;;
+ esac
+done
+
+if [[ -z "${START_TIMESTAMP}" ]]; then
+ echo "Missing mandatory argument -s START_TIMESTAMP"
+ usage
+fi
+
+if [[ -z "${END_TIMESTAMP}" ]]; then
+ echo "Missing mandatory argument -e END_TIMESTAMP"
+ usage
+fi
+
+if [[ -z "${REVISION_DB}" ]]; then
+ echo "Missing mandatory argument -r REVISION_DB"
+ usage
+fi
+
+if [[ -z "${ORIGIN_DB}" ]]; then
+ echo "Missing mandatory argument -o ORIGIN_DB"
+ usage
+fi
+
+if [[ -z "${TARGET_DB}" ]]; then
+ echo "Missing mandatory argument -t TARGET_DB"
+ usage
+fi
+
+if [[ -z "${CLUSTER}" ]]; then
+ echo "Missing mandatory argument -c CLUSTER"
+ usage
+fi
+
+if [[ -z "${DATABASES_FILE}" || ! -e "${DATABASES_FILE}" ]]; then
+ echo "Missing mandatory argument -d DATABASES_FILE or DATABASES_FILE does
not exists"
+ usage
+fi
+
+COUNT=0
+OK=0
+NO_REV=0
+DIFF=0
+NO_BLOB=0
+ERROR=0
+INVERTED=0
+
+while read DB; do
+ START="$(date +"%s.%N")"
+ COUNT=$((COUNT + 1))
+
+ echo -n "Checking ${DB}: "
+ MIN_PAGE="$(mysql -h "${REVISION_DB}" --batch --skip-column-names -e
"SELECT rev_text_id FROM revision WHERE rev_timestamp > '${START_TIMESTAMP}'
LIMIT 1" "${DB}")"
+ if [[ "${?}" -ne "0" ]]; then
+ ERROR=$((ERROR + 1))
+ echo "Unable to get revisions ($(delta_time)s)"
+ continue
+ fi
+
+ MAX_PAGE="$(mysql -h "${REVISION_DB}" --batch --skip-column-names -e
"SELECT rev_text_id FROM revision WHERE rev_timestamp > '${END_TIMESTAMP}'
LIMIT 1" "${DB}")"
+
+ if [[ -z "${MIN_PAGE}" || -z "${MAX_PAGE}" || "${MIN_PAGE}" ==
"${MAX_PAGE}" ]]; then
+ NO_REV=$((NO_REV + 1))
+ echo "No revisions to analyze ($(delta_time)s)"
+ continue
+ fi
+
+ MIN_BLOB="$(mysql -h "${REVISION_DB}" --batch --skip-column-names -e
"SELECT SUBSTRING_INDEX(old_text, '/', -1) FROM text WHERE old_id =
${MIN_PAGE}" "${DB}")"
+ MAX_BLOB="$(mysql -h "${REVISION_DB}" --batch --skip-column-names -e
"SELECT SUBSTRING_INDEX(old_text, '/', -1) FROM text WHERE old_id =
${MAX_PAGE}" "${DB}")"
+
+ if [[ -z "${MIN_BLOB}" || -z ${MAX_BLOB} ]]; then
+ NO_BLOB=$((NO_BLOB + 1))
+ echo "Unable to find blob_id for revision ${MIN_PAGE} or ${MAX_PAGE}
($(delta_time)s)"
+ continue
+ fi
+
+ if [[ "${MIN_BLOB}" -gt "${MAX_BLOB}" ]]; then
+ INVERTED=$((INVERTED + 1))
+ echo -n "INVERTED "
+ TMP_BLOB="${MIN_BLOB}"
+ MIN_BLOB="${MAX_BLOB}"
+ MAX_BLOB="${TMP_BLOB}"
+ fi
+
+ # Get from origin
+ mysql -h "${ORIGIN_DB}" --batch --skip-column-names -e "SELECT blob_id,
MD5(blob_text) FROM blobs_cluster${CLUSTER} WHERE blob_id BETWEEN ${MIN_BLOB}
AND ${MAX_BLOB}" "${DB}" > "${DB}_${ORIGIN_DB}.out"
+ # Get from target
+ mysql -h "${TARGET_DB}" --batch --skip-column-names -e "SELECT blob_id,
MD5(blob_text) FROM blobs_cluster${CLUSTER} WHERE blob_id BETWEEN ${MIN_BLOB}
AND ${MAX_BLOB}" "${DB}" > "${DB}_${TARGET_DB}.out"
+
+ # Compare
+ diff -q "${DB}_${ORIGIN_DB}.out" "${DB}_${TARGET_DB}.out"
+ if [[ "${?}" -eq "0" ]]; then
+ OK=$((OK + 1))
+ echo "OK ($(delta_time)s)"
+ else
+ DIFF=$((DIFF + 1))
+ echo "DIFF ($(delta_time)s)"
+ fi
+
+ sleep 1
+
+done < "${DATABASES_FILE}"
+
+echo "==== SUMMARY ===="
+echo "Analyzed ${COUNT} wikis"
+echo "rev_text_id needed to be inverted for ${INVERTED} wikis"
+echo "OK: ${OK} wikis"
+echo "No revisions to check: ${NO_REV} wikis"
+echo "No blob found for revision: ${NO_BLOB} wikis"
+echo "With differences: ${DIFF} wikis"
+echo "With error getting revisions: ${ERROR} wikis"
+
+exit 0
+
--
To view, visit https://gerrit.wikimedia.org/r/283946
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I8df901bbed858117393b182920bae52e07550ca2
Gerrit-PatchSet: 1
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: Volans <[email protected]>
Gerrit-Reviewer: Jcrespo <[email protected]>
Gerrit-Reviewer: Volans <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits