liferoad commented on code in PR #34473:
URL: https://github.com/apache/beam/pull/34473#discussion_r2021147485


##########
.github/workflows/run_rc_validation_python_mobile_gaming.yml:
##########
@@ -53,44 +58,124 @@ permissions:
   security-events: read
   statuses: read
 
-env:
-  DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }}
-  GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
-  GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
-  # Define unique names for resources based on run ID to avoid collisions
-  RUN_ID_SUFFIX: ${{ github.run_id }}_${{ github.run_attempt }}
-  BQ_DATASET: mobilegaming_py_rc_${{ github.run_id }}_${{ github.run_attempt }}
-  PUBSUB_TOPIC: mobilegaming_py_rc_${{ github.run_id }}_${{ github.run_attempt 
}}
-  # Set GCP Project ID, Bucket, Region as constants
+env: # Workflow level env vars if needed, specific ones are below
   GCP_PROJECT_ID: 'apache-beam-testing'
-  GCS_BUCKET: 'gs://rc-validation-migration-tests' # Includes gs:// prefix
-  GCE_REGION: 'us-central1'
-  # Java Injector specific envs
-  APACHE_REPO_URL: ${{ github.event.inputs.APACHE_CONTENTS_REPO }}
-  # Release specific envs
-  RELEASE_VERSION: ${{ github.event.inputs.RELEASE_VER }}
-  RC_NUM: ${{ github.event.inputs.RC_NUM }}
-  RC_TAG: 
"v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}"
-  # Python specific envs
-  PYTHON_VERSION: '3.9' # Specify desired Python version
-  BEAM_PYTHON_SDK_TAR_GZ: apache_beam-${{ github.event.inputs.RELEASE_VER 
}}.tar.gz
-  BEAM_SOURCE_TAR_GZ: apache_beam-${{ github.event.inputs.RELEASE_VER 
}}-source-release.zip
-  APACHE_DIST_URL_BASE: https://dist.apache.org/repos/dist/dev/beam/${{ 
github.event.inputs.RELEASE_VER }}
-  # Default duration for GameStats fixed window
-  GAME_STATS_WINDOW_DURATION: 60
+  GCS_BUCKET: 'gs://rc-validation-migration-tests'
 
 jobs:
   run_python_mobile_gaming_rc_validation:
     name: Run Python Mobile Gaming RC Validation (${{ 
github.event.inputs.RELEASE_VER }} RC${{ github.event.inputs.RC_NUM }})
-    runs-on: [self-hosted, ubuntu-20.04, main] # Assuming same runner type 
needed
-    timeout-minutes: 180 # Increased timeout for Python steps + Java injector
+    runs-on: [self-hosted, ubuntu-20.04, main]
+    timeout-minutes: 360
+    env: # Job-level env vars inherit workflow level and define job-specific 
ones
+      DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }}
+      GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+      GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+      RUN_ID_SUFFIX: ${{ github.run_id }}_${{ github.run_attempt }}
+      BQ_DATASET: mobilegaming_py_rc_${{ github.run_id }}_${{ 
github.run_attempt }}
+      PUBSUB_TOPIC: mobilegaming_py_rc_${{ github.run_id }}_${{ 
github.run_attempt }}
+      GCE_REGION: 'us-central1'
+      APACHE_REPO_URL: ${{ github.event.inputs.APACHE_CONTENTS_REPO }}
+      RELEASE_VERSION: ${{ github.event.inputs.RELEASE_VER }}
+      RC_NUM: ${{ github.event.inputs.RC_NUM }}
+      RC_TAG: 
"v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}"
+      PYTHON_VERSION: '3.9'
+      BEAM_PYTHON_SDK_TAR_GZ: apache_beam-${{ github.event.inputs.RELEASE_VER 
}}.tar.gz
+      BEAM_SOURCE_ZIP: apache-beam-${{ github.event.inputs.RELEASE_VER 
}}-source-release.zip
+      APACHE_DIST_URL_BASE: https://dist.apache.org/repos/dist/dev/beam/${{ 
github.event.inputs.RELEASE_VER }}
+      GAME_STATS_WINDOW_DURATION: 20
+      SUBMISSION_TIMEOUT_SECONDS: 120 # Timeout for the python submission 
script itself
+      # --- Define the validation function with enhanced debugging (FIXED 
QUOTING) ---
+      VALIDATE_TABLE_FUNC: |
+        validate_table() {
+          local table_name=$1
+          echo "DEBUG: ===== Starting validate_table for table: $table_name 
====="
+          # Ensure required env vars are set (GCP_PROJECT_ID, BQ_DATASET are 
inherited)
+          if [[ -z "$GCP_PROJECT_ID" || -z "$BQ_DATASET" ]]; then
+             echo "ERROR: GCP_PROJECT_ID and BQ_DATASET must be set in the 
environment."
+             exit 1
+          fi
+
+          local full_table_id="${GCP_PROJECT_ID}.${BQ_DATASET}.${table_name}"
+          local 
full_table_id_show="${GCP_PROJECT_ID}:${BQ_DATASET}.${table_name}"
+          local count=""
+          local exit_code=1
+          local retries=10
+          local delay=60 # Default seconds between retries
+
+          # Allow overriding delay via second argument (optional)
+          if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then
+              delay=$2
+              echo "DEBUG: Using custom retry delay: ${delay}s for table 
${table_name}"
+          else
+              echo "DEBUG: Using default retry delay: ${delay}s for table 
${table_name}"
+          fi
+          echo "DEBUG: Full table ID: ${full_table_id}, Max retries: 
${retries}"
+
+          for i in $(seq 1 $retries); do
+            echo "DEBUG: Starting attempt $i/$retries..."
+            local query_output
+
+            echo "DEBUG: Executing: bq query --project_id=${GCP_PROJECT_ID} 
--use_legacy_sql=false --format=sparse --max_rows=1 \"SELECT COUNT(*) FROM 
\`${full_table_id}\`\""
+            query_output=$(bq query --project_id=${GCP_PROJECT_ID} \
+                             --use_legacy_sql=false \
+                             --format=sparse \
+                             --max_rows=1 \
+                             "SELECT COUNT(*) FROM \`${full_table_id}\`" 2>&1)
+            exit_code=$?
+
+            echo "DEBUG: bq query exit code: $exit_code"
+            echo "DEBUG: bq query raw output: [$query_output]"
+
+            if [ $exit_code -eq 0 ]; then
+                echo "DEBUG: bq query exited successfully (code 0)."
+                count=$(echo "$query_output" | tail -n 1 | tr -d '[:space:]')
+                echo "DEBUG: Processed count after removing whitespace (from 
last line): [$count]"
+                if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
+                    echo "DEBUG: Count [$count] is a positive integer. 
Validation successful for this attempt."
+                    break # Success! Found non-zero rows
+                else
+                    echo "DEBUG: Count [$count] is zero or not a positive 
integer."
+                    if [[ "$count" == "0" ]]; then
+                       echo "DEBUG: Explicit count of 0 received."
+                    fi
+                fi
+            else
+                echo "DEBUG: bq query failed (exit code: $exit_code)."
+                echo "DEBUG: Checking table existence with bq show..."
+                if ! bq show --project_id=${GCP_PROJECT_ID} 
"${full_table_id_show}" > /dev/null 2>&1; then
+                  echo "DEBUG: Table ${full_table_id_show} appears not to 
exist (bq show failed)."
+                else
+                  echo "DEBUG: Table ${full_table_id_show} appears to exist 
(bq show succeeded), but query failed."
+                fi
+            fi
+
+            if [ $i -lt $retries ]; then
+              echo "DEBUG: Validation condition not met on attempt $i. 
Retrying in $delay seconds..."
+              sleep $delay
+            else
+              echo "DEBUG: Final attempt ($i) failed."
+            fi
+          done
+
+          echo "DEBUG: ===== Final validation check for table: $table_name 
====="
+          if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
+            echo "SUCCESS: Table ${table_name} has ${count} rows. Final 
validation OK."
+            echo "DEBUG: validate_table returning 0 (success)."
+            return 0 # Indicate success
+          else
+            echo "ERROR: Failed to get a non-zero row count for table 
${table_name} after $retries retries (Last exit code: $exit_code, Last 
processed count: '$count')."
+            echo "DEBUG: validate_table returning 1 (failure)."
+            return 1 # Indicate failure
+          fi
+        }

Review Comment:
   I think we should merge this PR first and later I can create a new PR to 
move this to a new script.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@beam.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to