This is an automated email from the ASF dual-hosted git repository.

xqhu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new e646bc30729 use the script when validating the BQ table (#34617)
e646bc30729 is described below

commit e646bc307293a0067f1711df1711d63c1fd5ea6d
Author: liferoad <[email protected]>
AuthorDate: Sun Apr 13 08:27:09 2025 -0400

    use the script when validating the BQ table (#34617)
---
 .../run_rc_validation_python_mobile_gaming.yml     | 105 ++----------------
 scripts/tools/validate_table.sh                    | 117 +++++++++++++++++++++
 2 files changed, 126 insertions(+), 96 deletions(-)

diff --git a/.github/workflows/run_rc_validation_python_mobile_gaming.yml 
b/.github/workflows/run_rc_validation_python_mobile_gaming.yml
index dc24281a368..c40b2a955f0 100644
--- a/.github/workflows/run_rc_validation_python_mobile_gaming.yml
+++ b/.github/workflows/run_rc_validation_python_mobile_gaming.yml
@@ -86,89 +86,6 @@ jobs:
       GAME_STATS_WINDOW_DURATION: 20
       SUBMISSION_TIMEOUT_SECONDS: 120 # Timeout for the python submission 
script itself
       # --- Define the validation function with enhanced debugging (FIXED 
QUOTING) ---
-      VALIDATE_TABLE_FUNC: |
-        validate_table() {
-          local table_name=$1
-          echo "DEBUG: ===== Starting validate_table for table: $table_name 
====="
-          # Ensure required env vars are set (GCP_PROJECT_ID, BQ_DATASET are 
inherited)
-          if [[ -z "$GCP_PROJECT_ID" || -z "$BQ_DATASET" ]]; then
-             echo "ERROR: GCP_PROJECT_ID and BQ_DATASET must be set in the 
environment."
-             exit 1
-          fi
-
-          local full_table_id="${GCP_PROJECT_ID}.${BQ_DATASET}.${table_name}"
-          local 
full_table_id_show="${GCP_PROJECT_ID}:${BQ_DATASET}.${table_name}"
-          local count=""
-          local exit_code=1
-          local retries=10
-          local delay=60 # Default seconds between retries
-
-          # Allow overriding delay via second argument (optional)
-          if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then
-              delay=$2
-              echo "DEBUG: Using custom retry delay: ${delay}s for table 
${table_name}"
-          else
-              echo "DEBUG: Using default retry delay: ${delay}s for table 
${table_name}"
-          fi
-          echo "DEBUG: Full table ID: ${full_table_id}, Max retries: 
${retries}"
-
-          for i in $(seq 1 $retries); do
-            echo "DEBUG: Starting attempt $i/$retries..."
-            local query_output
-
-            echo "DEBUG: Executing: bq query --project_id=${GCP_PROJECT_ID} 
--use_legacy_sql=false --format=sparse --max_rows=1 \"SELECT COUNT(*) FROM 
\`${full_table_id}\`\""
-            query_output=$(bq query --project_id=${GCP_PROJECT_ID} \
-                             --use_legacy_sql=false \
-                             --format=sparse \
-                             --max_rows=1 \
-                             "SELECT COUNT(*) FROM \`${full_table_id}\`" 2>&1)
-            exit_code=$?
-
-            echo "DEBUG: bq query exit code: $exit_code"
-            echo "DEBUG: bq query raw output: [$query_output]"
-
-            if [ $exit_code -eq 0 ]; then
-                echo "DEBUG: bq query exited successfully (code 0)."
-                count=$(echo "$query_output" | tail -n 1 | tr -d '[:space:]')
-                echo "DEBUG: Processed count after removing whitespace (from 
last line): [$count]"
-                if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
-                    echo "DEBUG: Count [$count] is a positive integer. 
Validation successful for this attempt."
-                    break # Success! Found non-zero rows
-                else
-                    echo "DEBUG: Count [$count] is zero or not a positive 
integer."
-                    if [[ "$count" == "0" ]]; then
-                       echo "DEBUG: Explicit count of 0 received."
-                    fi
-                fi
-            else
-                echo "DEBUG: bq query failed (exit code: $exit_code)."
-                echo "DEBUG: Checking table existence with bq show..."
-                if ! bq show --project_id=${GCP_PROJECT_ID} 
"${full_table_id_show}" > /dev/null 2>&1; then
-                  echo "DEBUG: Table ${full_table_id_show} appears not to 
exist (bq show failed)."
-                else
-                  echo "DEBUG: Table ${full_table_id_show} appears to exist 
(bq show succeeded), but query failed."
-                fi
-            fi
-
-            if [ $i -lt $retries ]; then
-              echo "DEBUG: Validation condition not met on attempt $i. 
Retrying in $delay seconds..."
-              sleep $delay
-            else
-              echo "DEBUG: Final attempt ($i) failed."
-            fi
-          done
-
-          echo "DEBUG: ===== Final validation check for table: $table_name 
====="
-          if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
-            echo "SUCCESS: Table ${table_name} has ${count} rows. Final 
validation OK."
-            echo "DEBUG: validate_table returning 0 (success)."
-            return 0 # Indicate success
-          else
-            echo "ERROR: Failed to get a non-zero row count for table 
${table_name} after $retries retries (Last exit code: $exit_code, Last 
processed count: '$count')."
-            echo "DEBUG: validate_table returning 1 (failure)."
-            return 1 # Indicate failure
-          fi
-        }
 
     steps:
       - name: Checkout code at RC tag
@@ -347,11 +264,10 @@ jobs:
       - name: Validate Leaderboard Results (Direct Runner)
         run: |
           source beam_env/bin/activate
-          eval "$VALIDATE_TABLE_FUNC"
           echo "Validating BigQuery results for Leaderboard (DirectRunner)..."
           sleep 90
-          validate_table "leader_board_users" || exit 1
-          validate_table "leader_board_teams" || exit 1
+          ./scripts/tools/validate_table.sh "leader_board_users" || exit 1
+          ./scripts/tools/validate_table.sh "leader_board_teams" || exit 1
           echo "Leaderboard (Direct Runner) BQ validation finished 
successfully."
         shell: bash
 
@@ -426,11 +342,10 @@ jobs:
              exit 0 # Exit step successfully to allow cancellation/cleanup
           fi
           source beam_env/bin/activate
-          eval "$VALIDATE_TABLE_FUNC"
           echo "Validating BigQuery results for Leaderboard 
(DataflowRunner)..."
           sleep 240
-          validate_table "leader_board_users" 15 || exit 1 # Use 15s retry 
delay
-          validate_table "leader_board_teams" 15 || exit 1 # Use 15s retry 
delay
+          ./scripts/tools/validate_table.sh "leader_board_users" 15 || exit 1 
# Use 15s retry delay
+          ./scripts/tools/validate_table.sh "leader_board_teams" 15 || exit 1 
# Use 15s retry delay
           echo "Leaderboard (Dataflow Runner) BQ validation finished 
successfully."
         shell: bash
 
@@ -451,7 +366,7 @@ jobs:
             echo "leaderboard_dataflow_jobid.txt not found, cannot cancel job 
(it might have failed before ID extraction)."
           fi
         shell: bash
-     
+
       # ================== GameStats Tests ==================
       - name: Run GameStats (Direct Runner) in Background
         run: |
@@ -471,12 +386,11 @@ jobs:
       - name: Validate GameStats Results (Direct Runner)
         run: |
           source beam_env/bin/activate
-          eval "$VALIDATE_TABLE_FUNC"
           echo "Validating BigQuery results for GameStats (DirectRunner)..."
           echo "* Sleeping for 25mins"
           sleep 25m
-          validate_table "game_stats_teams" || exit 1
-          validate_table "game_stats_sessions" || exit 1
+          ./scripts/tools/validate_table.sh "game_stats_teams" || exit 1
+          ./scripts/tools/validate_table.sh "game_stats_sessions" || exit 1
           echo "GameStats (Direct Runner) BQ validation finished successfully."
         shell: bash
 
@@ -552,12 +466,11 @@ jobs:
              exit 0 # Exit step successfully to allow cleanup
           fi
           source beam_env/bin/activate
-          eval "$VALIDATE_TABLE_FUNC"
           echo "Validating BigQuery results for GameStats (DataflowRunner)..."
           echo "* Sleeping for 25mins"
           sleep 25m
-          validate_table "game_stats_teams" 15 || exit 1 # Use 15s retry delay
-          validate_table "game_stats_sessions" 15 || exit 1 # Use 15s retry 
delay
+          ./scripts/tools/validate_table.sh "game_stats_teams" 15 || exit 1 # 
Use 15s retry delay
+          ./scripts/tools/validate_table.sh "game_stats_sessions" 15 || exit 1 
# Use 15s retry delay
           echo "GameStats (Dataflow Runner) BQ validation finished 
successfully."
         shell: bash
 
diff --git a/scripts/tools/validate_table.sh b/scripts/tools/validate_table.sh
new file mode 100755
index 00000000000..2815893d556
--- /dev/null
+++ b/scripts/tools/validate_table.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Function to validate if a BigQuery table exists and has rows.
+# Usage: validate_table <table_name> [retry_delay_seconds]
+# Exits with 0 if validation succeeds, 1 otherwise.
+# Requires GCP_PROJECT_ID and BQ_DATASET to be set in the environment.
+
+validate_table() {
+  local table_name=$1
+  echo "DEBUG: ===== Starting validate_table for table: $table_name ====="
+  # Ensure required env vars are set (GCP_PROJECT_ID, BQ_DATASET are inherited)
+  if [[ -z "$GCP_PROJECT_ID" || -z "$BQ_DATASET" ]]; then
+     echo "ERROR: GCP_PROJECT_ID and BQ_DATASET must be set in the 
environment."
+     exit 1 # Exit script if env vars missing
+  fi
+
+  local full_table_id="${GCP_PROJECT_ID}.${BQ_DATASET}.${table_name}"
+  local full_table_id_show="${GCP_PROJECT_ID}:${BQ_DATASET}.${table_name}"
+  local count=""
+  local exit_code=1
+  local retries=10
+  local delay=60 # Default seconds between retries
+
+  # Allow overriding delay via second argument (optional)
+  if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then
+      delay=$2
+      echo "DEBUG: Using custom retry delay: ${delay}s for table ${table_name}"
+  else
+      echo "DEBUG: Using default retry delay: ${delay}s for table 
${table_name}"
+  fi
+  echo "DEBUG: Full table ID: ${full_table_id}, Max retries: ${retries}"
+
+  for i in $(seq 1 $retries); do
+    echo "DEBUG: Starting attempt $i/$retries..."
+    local query_output
+
+    echo "DEBUG: Executing: bq query --project_id=${GCP_PROJECT_ID} 
--use_legacy_sql=false --format=sparse --max_rows=1 \"SELECT COUNT(*) FROM 
\`${full_table_id}\`\""
+    query_output=$(bq query --project_id=${GCP_PROJECT_ID} \
+                     --use_legacy_sql=false \
+                     --format=sparse \
+                     --max_rows=1 \
+                     "SELECT COUNT(*) FROM \`${full_table_id}\`" 2>&1)
+    exit_code=$?
+
+    echo "DEBUG: bq query exit code: $exit_code"
+    echo "DEBUG: bq query raw output: [$query_output]"
+
+    if [ $exit_code -eq 0 ]; then
+        echo "DEBUG: bq query exited successfully (code 0)."
+        count=$(echo "$query_output" | tail -n 1 | tr -d '[:space:]')
+        echo "DEBUG: Processed count after removing whitespace (from last 
line): [$count]"
+        if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
+            echo "DEBUG: Count [$count] is a positive integer. Validation 
successful for this attempt."
+            break # Success! Found non-zero rows
+        else
+            echo "DEBUG: Count [$count] is zero or not a positive integer."
+            if [[ "$count" == "0" ]]; then
+               echo "DEBUG: Explicit count of 0 received."
+            fi
+        fi
+    else
+        echo "DEBUG: bq query failed (exit code: $exit_code)."
+        echo "DEBUG: Checking table existence with bq show..."
+        if ! bq show --project_id=${GCP_PROJECT_ID} "${full_table_id_show}" > 
/dev/null 2>&1; then
+          echo "DEBUG: Table ${full_table_id_show} appears not to exist (bq 
show failed)."
+        else
+          echo "DEBUG: Table ${full_table_id_show} appears to exist (bq show 
succeeded), but query failed."
+        fi
+    fi
+
+    if [ $i -lt $retries ]; then
+      echo "DEBUG: Validation condition not met on attempt $i. Retrying in 
$delay seconds..."
+      sleep $delay
+    else
+      echo "DEBUG: Final attempt ($i) failed."
+    fi
+  done
+
+echo "DEBUG: ===== Final validation check for table: $table_name ====="
+  if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
+    echo "SUCCESS: Table ${table_name} has ${count} rows. Final validation OK."
+    echo "DEBUG: validate_table returning 0 (success)."
+    return 0 # Indicate success
+  else
+    echo "ERROR: Failed to get a non-zero row count for table ${table_name} 
after $retries retries (Last exit code: $exit_code, Last processed count: 
'$count')."
+    echo "DEBUG: validate_table returning 1 (failure)."
+    return 1 # Indicate failure
+  fi
+}
+
+# Allow the script to be sourced using "source ./script.sh"
+# and then call the function directly: "validate_table my_table 30"
+# If the script is executed directly, check if arguments are provided and call 
the function.
+if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
+  if [[ $# -eq 0 ]]; then
+    echo "Usage: $0 <table_name> [retry_delay_seconds]"
+    echo "Requires GCP_PROJECT_ID and BQ_DATASET env vars."
+    exit 1
+  fi
+  validate_table "$@"
+fi

Reply via email to