This is an automated email from the ASF dual-hosted git repository.
xqhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new e646bc30729 use the script when validating the BQ table (#34617)
e646bc30729 is described below
commit e646bc307293a0067f1711df1711d63c1fd5ea6d
Author: liferoad <[email protected]>
AuthorDate: Sun Apr 13 08:27:09 2025 -0400
use the script when validating the BQ table (#34617)
---
.../run_rc_validation_python_mobile_gaming.yml | 105 ++----------------
scripts/tools/validate_table.sh | 117 +++++++++++++++++++++
2 files changed, 126 insertions(+), 96 deletions(-)
diff --git a/.github/workflows/run_rc_validation_python_mobile_gaming.yml
b/.github/workflows/run_rc_validation_python_mobile_gaming.yml
index dc24281a368..c40b2a955f0 100644
--- a/.github/workflows/run_rc_validation_python_mobile_gaming.yml
+++ b/.github/workflows/run_rc_validation_python_mobile_gaming.yml
@@ -86,89 +86,6 @@ jobs:
GAME_STATS_WINDOW_DURATION: 20
SUBMISSION_TIMEOUT_SECONDS: 120 # Timeout for the python submission
script itself
# --- Define the validation function with enhanced debugging (FIXED
QUOTING) ---
- VALIDATE_TABLE_FUNC: |
- validate_table() {
- local table_name=$1
- echo "DEBUG: ===== Starting validate_table for table: $table_name
====="
- # Ensure required env vars are set (GCP_PROJECT_ID, BQ_DATASET are
inherited)
- if [[ -z "$GCP_PROJECT_ID" || -z "$BQ_DATASET" ]]; then
- echo "ERROR: GCP_PROJECT_ID and BQ_DATASET must be set in the
environment."
- exit 1
- fi
-
- local full_table_id="${GCP_PROJECT_ID}.${BQ_DATASET}.${table_name}"
- local
full_table_id_show="${GCP_PROJECT_ID}:${BQ_DATASET}.${table_name}"
- local count=""
- local exit_code=1
- local retries=10
- local delay=60 # Default seconds between retries
-
- # Allow overriding delay via second argument (optional)
- if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then
- delay=$2
- echo "DEBUG: Using custom retry delay: ${delay}s for table
${table_name}"
- else
- echo "DEBUG: Using default retry delay: ${delay}s for table
${table_name}"
- fi
- echo "DEBUG: Full table ID: ${full_table_id}, Max retries:
${retries}"
-
- for i in $(seq 1 $retries); do
- echo "DEBUG: Starting attempt $i/$retries..."
- local query_output
-
- echo "DEBUG: Executing: bq query --project_id=${GCP_PROJECT_ID}
--use_legacy_sql=false --format=sparse --max_rows=1 \"SELECT COUNT(*) FROM
\`${full_table_id}\`\""
- query_output=$(bq query --project_id=${GCP_PROJECT_ID} \
- --use_legacy_sql=false \
- --format=sparse \
- --max_rows=1 \
- "SELECT COUNT(*) FROM \`${full_table_id}\`" 2>&1)
- exit_code=$?
-
- echo "DEBUG: bq query exit code: $exit_code"
- echo "DEBUG: bq query raw output: [$query_output]"
-
- if [ $exit_code -eq 0 ]; then
- echo "DEBUG: bq query exited successfully (code 0)."
- count=$(echo "$query_output" | tail -n 1 | tr -d '[:space:]')
- echo "DEBUG: Processed count after removing whitespace (from
last line): [$count]"
- if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
- echo "DEBUG: Count [$count] is a positive integer.
Validation successful for this attempt."
- break # Success! Found non-zero rows
- else
- echo "DEBUG: Count [$count] is zero or not a positive
integer."
- if [[ "$count" == "0" ]]; then
- echo "DEBUG: Explicit count of 0 received."
- fi
- fi
- else
- echo "DEBUG: bq query failed (exit code: $exit_code)."
- echo "DEBUG: Checking table existence with bq show..."
- if ! bq show --project_id=${GCP_PROJECT_ID}
"${full_table_id_show}" > /dev/null 2>&1; then
- echo "DEBUG: Table ${full_table_id_show} appears not to
exist (bq show failed)."
- else
- echo "DEBUG: Table ${full_table_id_show} appears to exist
(bq show succeeded), but query failed."
- fi
- fi
-
- if [ $i -lt $retries ]; then
- echo "DEBUG: Validation condition not met on attempt $i.
Retrying in $delay seconds..."
- sleep $delay
- else
- echo "DEBUG: Final attempt ($i) failed."
- fi
- done
-
- echo "DEBUG: ===== Final validation check for table: $table_name
====="
- if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
- echo "SUCCESS: Table ${table_name} has ${count} rows. Final
validation OK."
- echo "DEBUG: validate_table returning 0 (success)."
- return 0 # Indicate success
- else
- echo "ERROR: Failed to get a non-zero row count for table
${table_name} after $retries retries (Last exit code: $exit_code, Last
processed count: '$count')."
- echo "DEBUG: validate_table returning 1 (failure)."
- return 1 # Indicate failure
- fi
- }
steps:
- name: Checkout code at RC tag
@@ -347,11 +264,10 @@ jobs:
- name: Validate Leaderboard Results (Direct Runner)
run: |
source beam_env/bin/activate
- eval "$VALIDATE_TABLE_FUNC"
echo "Validating BigQuery results for Leaderboard (DirectRunner)..."
sleep 90
- validate_table "leader_board_users" || exit 1
- validate_table "leader_board_teams" || exit 1
+ ./scripts/tools/validate_table.sh "leader_board_users" || exit 1
+ ./scripts/tools/validate_table.sh "leader_board_teams" || exit 1
echo "Leaderboard (Direct Runner) BQ validation finished
successfully."
shell: bash
@@ -426,11 +342,10 @@ jobs:
exit 0 # Exit step successfully to allow cancellation/cleanup
fi
source beam_env/bin/activate
- eval "$VALIDATE_TABLE_FUNC"
echo "Validating BigQuery results for Leaderboard
(DataflowRunner)..."
sleep 240
- validate_table "leader_board_users" 15 || exit 1 # Use 15s retry
delay
- validate_table "leader_board_teams" 15 || exit 1 # Use 15s retry
delay
+ ./scripts/tools/validate_table.sh "leader_board_users" 15 || exit 1
# Use 15s retry delay
+ ./scripts/tools/validate_table.sh "leader_board_teams" 15 || exit 1
# Use 15s retry delay
echo "Leaderboard (Dataflow Runner) BQ validation finished
successfully."
shell: bash
@@ -451,7 +366,7 @@ jobs:
echo "leaderboard_dataflow_jobid.txt not found, cannot cancel job
(it might have failed before ID extraction)."
fi
shell: bash
-
+
# ================== GameStats Tests ==================
- name: Run GameStats (Direct Runner) in Background
run: |
@@ -471,12 +386,11 @@ jobs:
- name: Validate GameStats Results (Direct Runner)
run: |
source beam_env/bin/activate
- eval "$VALIDATE_TABLE_FUNC"
echo "Validating BigQuery results for GameStats (DirectRunner)..."
echo "* Sleeping for 25mins"
sleep 25m
- validate_table "game_stats_teams" || exit 1
- validate_table "game_stats_sessions" || exit 1
+ ./scripts/tools/validate_table.sh "game_stats_teams" || exit 1
+ ./scripts/tools/validate_table.sh "game_stats_sessions" || exit 1
echo "GameStats (Direct Runner) BQ validation finished successfully."
shell: bash
@@ -552,12 +466,11 @@ jobs:
exit 0 # Exit step successfully to allow cleanup
fi
source beam_env/bin/activate
- eval "$VALIDATE_TABLE_FUNC"
echo "Validating BigQuery results for GameStats (DataflowRunner)..."
echo "* Sleeping for 25mins"
sleep 25m
- validate_table "game_stats_teams" 15 || exit 1 # Use 15s retry delay
- validate_table "game_stats_sessions" 15 || exit 1 # Use 15s retry
delay
+ ./scripts/tools/validate_table.sh "game_stats_teams" 15 || exit 1 #
Use 15s retry delay
+ ./scripts/tools/validate_table.sh "game_stats_sessions" 15 || exit 1
# Use 15s retry delay
echo "GameStats (Dataflow Runner) BQ validation finished
successfully."
shell: bash
diff --git a/scripts/tools/validate_table.sh b/scripts/tools/validate_table.sh
new file mode 100755
index 00000000000..2815893d556
--- /dev/null
+++ b/scripts/tools/validate_table.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Function to validate if a BigQuery table exists and has rows.
+# Usage: validate_table <table_name> [retry_delay_seconds]
+# Exits with 0 if validation succeeds, 1 otherwise.
+# Requires GCP_PROJECT_ID and BQ_DATASET to be set in the environment.
+
+validate_table() {
+ local table_name=$1
+ echo "DEBUG: ===== Starting validate_table for table: $table_name ====="
+ # Ensure required env vars are set (GCP_PROJECT_ID, BQ_DATASET are inherited)
+ if [[ -z "$GCP_PROJECT_ID" || -z "$BQ_DATASET" ]]; then
+ echo "ERROR: GCP_PROJECT_ID and BQ_DATASET must be set in the
environment."
+ exit 1 # Exit script if env vars missing
+ fi
+
+ local full_table_id="${GCP_PROJECT_ID}.${BQ_DATASET}.${table_name}"
+ local full_table_id_show="${GCP_PROJECT_ID}:${BQ_DATASET}.${table_name}"
+ local count=""
+ local exit_code=1
+ local retries=10
+ local delay=60 # Default seconds between retries
+
+ # Allow overriding delay via second argument (optional)
+ if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then
+ delay=$2
+ echo "DEBUG: Using custom retry delay: ${delay}s for table ${table_name}"
+ else
+ echo "DEBUG: Using default retry delay: ${delay}s for table
${table_name}"
+ fi
+ echo "DEBUG: Full table ID: ${full_table_id}, Max retries: ${retries}"
+
+ for i in $(seq 1 $retries); do
+ echo "DEBUG: Starting attempt $i/$retries..."
+ local query_output
+
+ echo "DEBUG: Executing: bq query --project_id=${GCP_PROJECT_ID}
--use_legacy_sql=false --format=sparse --max_rows=1 \"SELECT COUNT(*) FROM
\`${full_table_id}\`\""
+ query_output=$(bq query --project_id=${GCP_PROJECT_ID} \
+ --use_legacy_sql=false \
+ --format=sparse \
+ --max_rows=1 \
+ "SELECT COUNT(*) FROM \`${full_table_id}\`" 2>&1)
+ exit_code=$?
+
+ echo "DEBUG: bq query exit code: $exit_code"
+ echo "DEBUG: bq query raw output: [$query_output]"
+
+ if [ $exit_code -eq 0 ]; then
+ echo "DEBUG: bq query exited successfully (code 0)."
+ count=$(echo "$query_output" | tail -n 1 | tr -d '[:space:]')
+ echo "DEBUG: Processed count after removing whitespace (from last
line): [$count]"
+ if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
+ echo "DEBUG: Count [$count] is a positive integer. Validation
successful for this attempt."
+ break # Success! Found non-zero rows
+ else
+ echo "DEBUG: Count [$count] is zero or not a positive integer."
+ if [[ "$count" == "0" ]]; then
+ echo "DEBUG: Explicit count of 0 received."
+ fi
+ fi
+ else
+ echo "DEBUG: bq query failed (exit code: $exit_code)."
+ echo "DEBUG: Checking table existence with bq show..."
+ if ! bq show --project_id=${GCP_PROJECT_ID} "${full_table_id_show}" >
/dev/null 2>&1; then
+ echo "DEBUG: Table ${full_table_id_show} appears not to exist (bq
show failed)."
+ else
+ echo "DEBUG: Table ${full_table_id_show} appears to exist (bq show
succeeded), but query failed."
+ fi
+ fi
+
+ if [ $i -lt $retries ]; then
+ echo "DEBUG: Validation condition not met on attempt $i. Retrying in
$delay seconds..."
+ sleep $delay
+ else
+ echo "DEBUG: Final attempt ($i) failed."
+ fi
+ done
+
+echo "DEBUG: ===== Final validation check for table: $table_name ====="
+ if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
+ echo "SUCCESS: Table ${table_name} has ${count} rows. Final validation OK."
+ echo "DEBUG: validate_table returning 0 (success)."
+ return 0 # Indicate success
+ else
+ echo "ERROR: Failed to get a non-zero row count for table ${table_name}
after $retries retries (Last exit code: $exit_code, Last processed count:
'$count')."
+ echo "DEBUG: validate_table returning 1 (failure)."
+ return 1 # Indicate failure
+ fi
+}
+
+# Allow the script to be sourced using "source ./script.sh"
+# and then call the function directly: "validate_table my_table 30"
+# If the script is executed directly, check if arguments are provided and call
the function.
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+ if [[ $# -eq 0 ]]; then
+ echo "Usage: $0 <table_name> [retry_delay_seconds]"
+ echo "Requires GCP_PROJECT_ID and BQ_DATASET env vars."
+ exit 1
+ fi
+ validate_table "$@"
+fi