liferoad commented on code in PR #34473: URL: https://github.com/apache/beam/pull/34473#discussion_r2021049215
########## .github/workflows/run_rc_validation_python_mobile_gaming.yml: ########## @@ -53,44 +58,124 @@ permissions: security-events: read statuses: read -env: - DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} - GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} - GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} - # Define unique names for resources based on run ID to avoid collisions - RUN_ID_SUFFIX: ${{ github.run_id }}_${{ github.run_attempt }} - BQ_DATASET: mobilegaming_py_rc_${{ github.run_id }}_${{ github.run_attempt }} - PUBSUB_TOPIC: mobilegaming_py_rc_${{ github.run_id }}_${{ github.run_attempt }} - # Set GCP Project ID, Bucket, Region as constants +env: # Workflow level env vars if needed, specific ones are below GCP_PROJECT_ID: 'apache-beam-testing' - GCS_BUCKET: 'gs://rc-validation-migration-tests' # Includes gs:// prefix - GCE_REGION: 'us-central1' - # Java Injector specific envs - APACHE_REPO_URL: ${{ github.event.inputs.APACHE_CONTENTS_REPO }} - # Release specific envs - RELEASE_VERSION: ${{ github.event.inputs.RELEASE_VER }} - RC_NUM: ${{ github.event.inputs.RC_NUM }} - RC_TAG: "v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}" - # Python specific envs - PYTHON_VERSION: '3.9' # Specify desired Python version - BEAM_PYTHON_SDK_TAR_GZ: apache_beam-${{ github.event.inputs.RELEASE_VER }}.tar.gz - BEAM_SOURCE_TAR_GZ: apache_beam-${{ github.event.inputs.RELEASE_VER }}-source-release.zip - APACHE_DIST_URL_BASE: https://dist.apache.org/repos/dist/dev/beam/${{ github.event.inputs.RELEASE_VER }} - # Default duration for GameStats fixed window - GAME_STATS_WINDOW_DURATION: 60 + GCS_BUCKET: 'gs://rc-validation-migration-tests' jobs: run_python_mobile_gaming_rc_validation: name: Run Python Mobile Gaming RC Validation (${{ github.event.inputs.RELEASE_VER }} RC${{ github.event.inputs.RC_NUM }}) - runs-on: [self-hosted, ubuntu-20.04, main] # Assuming same runner type needed - timeout-minutes: 180 # Increased timeout for Python steps + Java injector + runs-on: [self-hosted, ubuntu-20.04, main] + timeout-minutes: 360 + env: # Job-level env vars inherit workflow level and define job-specific ones + DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }} + GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }} + GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }} + RUN_ID_SUFFIX: ${{ github.run_id }}_${{ github.run_attempt }} + BQ_DATASET: mobilegaming_py_rc_${{ github.run_id }}_${{ github.run_attempt }} + PUBSUB_TOPIC: mobilegaming_py_rc_${{ github.run_id }}_${{ github.run_attempt }} + GCE_REGION: 'us-central1' + APACHE_REPO_URL: ${{ github.event.inputs.APACHE_CONTENTS_REPO }} + RELEASE_VERSION: ${{ github.event.inputs.RELEASE_VER }} + RC_NUM: ${{ github.event.inputs.RC_NUM }} + RC_TAG: "v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}" + PYTHON_VERSION: '3.9' + BEAM_PYTHON_SDK_TAR_GZ: apache_beam-${{ github.event.inputs.RELEASE_VER }}.tar.gz + BEAM_SOURCE_ZIP: apache-beam-${{ github.event.inputs.RELEASE_VER }}-source-release.zip + APACHE_DIST_URL_BASE: https://dist.apache.org/repos/dist/dev/beam/${{ github.event.inputs.RELEASE_VER }} + GAME_STATS_WINDOW_DURATION: 20 + SUBMISSION_TIMEOUT_SECONDS: 120 # Timeout for the python submission script itself + # --- Define the validation function with enhanced debugging (FIXED QUOTING) --- + VALIDATE_TABLE_FUNC: | + validate_table() { + local table_name=$1 + echo "DEBUG: ===== Starting validate_table for table: $table_name =====" + # Ensure required env vars are set (GCP_PROJECT_ID, BQ_DATASET are inherited) + if [[ -z "$GCP_PROJECT_ID" || -z "$BQ_DATASET" ]]; then + echo "ERROR: GCP_PROJECT_ID and BQ_DATASET must be set in the environment." + exit 1 + fi + + local full_table_id="${GCP_PROJECT_ID}.${BQ_DATASET}.${table_name}" + local full_table_id_show="${GCP_PROJECT_ID}:${BQ_DATASET}.${table_name}" + local count="" + local exit_code=1 + local retries=10 + local delay=60 # Default seconds between retries + + # Allow overriding delay via second argument (optional) + if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then + delay=$2 + echo "DEBUG: Using custom retry delay: ${delay}s for table ${table_name}" + else + echo "DEBUG: Using default retry delay: ${delay}s for table ${table_name}" + fi + echo "DEBUG: Full table ID: ${full_table_id}, Max retries: ${retries}" + + for i in $(seq 1 $retries); do + echo "DEBUG: Starting attempt $i/$retries..." + local query_output + + echo "DEBUG: Executing: bq query --project_id=${GCP_PROJECT_ID} --use_legacy_sql=false --format=sparse --max_rows=1 \"SELECT COUNT(*) FROM \`${full_table_id}\`\"" + query_output=$(bq query --project_id=${GCP_PROJECT_ID} \ + --use_legacy_sql=false \ + --format=sparse \ + --max_rows=1 \ + "SELECT COUNT(*) FROM \`${full_table_id}\`" 2>&1) + exit_code=$? + + echo "DEBUG: bq query exit code: $exit_code" + echo "DEBUG: bq query raw output: [$query_output]" + + if [ $exit_code -eq 0 ]; then + echo "DEBUG: bq query exited successfully (code 0)." + count=$(echo "$query_output" | tail -n 1 | tr -d '[:space:]') + echo "DEBUG: Processed count after removing whitespace (from last line): [$count]" + if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then + echo "DEBUG: Count [$count] is a positive integer. Validation successful for this attempt." + break # Success! Found non-zero rows + else + echo "DEBUG: Count [$count] is zero or not a positive integer." + if [[ "$count" == "0" ]]; then + echo "DEBUG: Explicit count of 0 received." + fi + fi + else + echo "DEBUG: bq query failed (exit code: $exit_code)." + echo "DEBUG: Checking table existence with bq show..." + if ! bq show --project_id=${GCP_PROJECT_ID} "${full_table_id_show}" > /dev/null 2>&1; then + echo "DEBUG: Table ${full_table_id_show} appears not to exist (bq show failed)." + else + echo "DEBUG: Table ${full_table_id_show} appears to exist (bq show succeeded), but query failed." + fi + fi + + if [ $i -lt $retries ]; then + echo "DEBUG: Validation condition not met on attempt $i. Retrying in $delay seconds..." + sleep $delay + else + echo "DEBUG: Final attempt ($i) failed." + fi + done + + echo "DEBUG: ===== Final validation check for table: $table_name =====" + if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then + echo "SUCCESS: Table ${table_name} has ${count} rows. Final validation OK." + echo "DEBUG: validate_table returning 0 (success)." + return 0 # Indicate success + else + echo "ERROR: Failed to get a non-zero row count for table ${table_name} after $retries retries (Last exit code: $exit_code, Last processed count: '$count')." + echo "DEBUG: validate_table returning 1 (failure)." + return 1 # Indicate failure + fi + } Review Comment: I intend to keep this self-contained. So we can easily fix this within this workflow file. I think we should merge first and keep improving this if this need more frequent changes which I doubt. I am also not sure if we can test the change in another bash file. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@beam.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org