damccorm commented on code in PR #34473:
URL: https://github.com/apache/beam/pull/34473#discussion_r2021032460


##########
.github/workflows/run_rc_validation_python_mobile_gaming.yml:
##########
@@ -53,44 +58,124 @@ permissions:
   security-events: read
   statuses: read
 
-env:
-  DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }}
-  GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
-  GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
-  # Define unique names for resources based on run ID to avoid collisions
-  RUN_ID_SUFFIX: ${{ github.run_id }}_${{ github.run_attempt }}
-  BQ_DATASET: mobilegaming_py_rc_${{ github.run_id }}_${{ github.run_attempt }}
-  PUBSUB_TOPIC: mobilegaming_py_rc_${{ github.run_id }}_${{ github.run_attempt 
}}
-  # Set GCP Project ID, Bucket, Region as constants
+env: # Workflow level env vars if needed, specific ones are below
   GCP_PROJECT_ID: 'apache-beam-testing'
-  GCS_BUCKET: 'gs://rc-validation-migration-tests' # Includes gs:// prefix
-  GCE_REGION: 'us-central1'
-  # Java Injector specific envs
-  APACHE_REPO_URL: ${{ github.event.inputs.APACHE_CONTENTS_REPO }}
-  # Release specific envs
-  RELEASE_VERSION: ${{ github.event.inputs.RELEASE_VER }}
-  RC_NUM: ${{ github.event.inputs.RC_NUM }}
-  RC_TAG: 
"v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}"
-  # Python specific envs
-  PYTHON_VERSION: '3.9' # Specify desired Python version
-  BEAM_PYTHON_SDK_TAR_GZ: apache_beam-${{ github.event.inputs.RELEASE_VER 
}}.tar.gz
-  BEAM_SOURCE_TAR_GZ: apache_beam-${{ github.event.inputs.RELEASE_VER 
}}-source-release.zip
-  APACHE_DIST_URL_BASE: https://dist.apache.org/repos/dist/dev/beam/${{ 
github.event.inputs.RELEASE_VER }}
-  # Default duration for GameStats fixed window
-  GAME_STATS_WINDOW_DURATION: 60
+  GCS_BUCKET: 'gs://rc-validation-migration-tests'
 
 jobs:
   run_python_mobile_gaming_rc_validation:
     name: Run Python Mobile Gaming RC Validation (${{ 
github.event.inputs.RELEASE_VER }} RC${{ github.event.inputs.RC_NUM }})
-    runs-on: [self-hosted, ubuntu-20.04, main] # Assuming same runner type 
needed
-    timeout-minutes: 180 # Increased timeout for Python steps + Java injector
+    runs-on: [self-hosted, ubuntu-20.04, main]
+    timeout-minutes: 360
+    env: # Job-level env vars inherit workflow level and define job-specific 
ones
+      DEVELOCITY_ACCESS_KEY: ${{ secrets.DEVELOCITY_ACCESS_KEY }}
+      GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
+      GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
+      RUN_ID_SUFFIX: ${{ github.run_id }}_${{ github.run_attempt }}
+      BQ_DATASET: mobilegaming_py_rc_${{ github.run_id }}_${{ 
github.run_attempt }}
+      PUBSUB_TOPIC: mobilegaming_py_rc_${{ github.run_id }}_${{ 
github.run_attempt }}
+      GCE_REGION: 'us-central1'
+      APACHE_REPO_URL: ${{ github.event.inputs.APACHE_CONTENTS_REPO }}
+      RELEASE_VERSION: ${{ github.event.inputs.RELEASE_VER }}
+      RC_NUM: ${{ github.event.inputs.RC_NUM }}
+      RC_TAG: 
"v${{github.event.inputs.RELEASE_VER}}-RC${{github.event.inputs.RC_NUM}}"
+      PYTHON_VERSION: '3.9'
+      BEAM_PYTHON_SDK_TAR_GZ: apache_beam-${{ github.event.inputs.RELEASE_VER 
}}.tar.gz
+      BEAM_SOURCE_ZIP: apache-beam-${{ github.event.inputs.RELEASE_VER 
}}-source-release.zip
+      APACHE_DIST_URL_BASE: https://dist.apache.org/repos/dist/dev/beam/${{ 
github.event.inputs.RELEASE_VER }}
+      GAME_STATS_WINDOW_DURATION: 20
+      SUBMISSION_TIMEOUT_SECONDS: 120 # Timeout for the python submission 
script itself
+      # --- Define the validation function with enhanced debugging (FIXED 
QUOTING) ---
+      VALIDATE_TABLE_FUNC: |
+        validate_table() {
+          local table_name=$1
+          echo "DEBUG: ===== Starting validate_table for table: $table_name 
====="
+          # Ensure required env vars are set (GCP_PROJECT_ID, BQ_DATASET are 
inherited)
+          if [[ -z "$GCP_PROJECT_ID" || -z "$BQ_DATASET" ]]; then
+             echo "ERROR: GCP_PROJECT_ID and BQ_DATASET must be set in the 
environment."
+             exit 1
+          fi
+
+          local full_table_id="${GCP_PROJECT_ID}.${BQ_DATASET}.${table_name}"
+          local 
full_table_id_show="${GCP_PROJECT_ID}:${BQ_DATASET}.${table_name}"
+          local count=""
+          local exit_code=1
+          local retries=10
+          local delay=60 # Default seconds between retries
+
+          # Allow overriding delay via second argument (optional)
+          if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then
+              delay=$2
+              echo "DEBUG: Using custom retry delay: ${delay}s for table 
${table_name}"
+          else
+              echo "DEBUG: Using default retry delay: ${delay}s for table 
${table_name}"
+          fi
+          echo "DEBUG: Full table ID: ${full_table_id}, Max retries: 
${retries}"
+
+          for i in $(seq 1 $retries); do
+            echo "DEBUG: Starting attempt $i/$retries..."
+            local query_output
+
+            echo "DEBUG: Executing: bq query --project_id=${GCP_PROJECT_ID} 
--use_legacy_sql=false --format=sparse --max_rows=1 \"SELECT COUNT(*) FROM 
\`${full_table_id}\`\""
+            query_output=$(bq query --project_id=${GCP_PROJECT_ID} \
+                             --use_legacy_sql=false \
+                             --format=sparse \
+                             --max_rows=1 \
+                             "SELECT COUNT(*) FROM \`${full_table_id}\`" 2>&1)
+            exit_code=$?
+
+            echo "DEBUG: bq query exit code: $exit_code"
+            echo "DEBUG: bq query raw output: [$query_output]"
+
+            if [ $exit_code -eq 0 ]; then
+                echo "DEBUG: bq query exited successfully (code 0)."
+                count=$(echo "$query_output" | tail -n 1 | tr -d '[:space:]')
+                echo "DEBUG: Processed count after removing whitespace (from 
last line): [$count]"
+                if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
+                    echo "DEBUG: Count [$count] is a positive integer. 
Validation successful for this attempt."
+                    break # Success! Found non-zero rows
+                else
+                    echo "DEBUG: Count [$count] is zero or not a positive 
integer."
+                    if [[ "$count" == "0" ]]; then
+                       echo "DEBUG: Explicit count of 0 received."
+                    fi
+                fi
+            else
+                echo "DEBUG: bq query failed (exit code: $exit_code)."
+                echo "DEBUG: Checking table existence with bq show..."
+                if ! bq show --project_id=${GCP_PROJECT_ID} 
"${full_table_id_show}" > /dev/null 2>&1; then
+                  echo "DEBUG: Table ${full_table_id_show} appears not to 
exist (bq show failed)."
+                else
+                  echo "DEBUG: Table ${full_table_id_show} appears to exist 
(bq show succeeded), but query failed."
+                fi
+            fi
+
+            if [ $i -lt $retries ]; then
+              echo "DEBUG: Validation condition not met on attempt $i. 
Retrying in $delay seconds..."
+              sleep $delay
+            else
+              echo "DEBUG: Final attempt ($i) failed."
+            fi
+          done
+
+          echo "DEBUG: ===== Final validation check for table: $table_name 
====="
+          if [[ "$count" =~ ^[0-9]+$ ]] && [ "$count" -gt 0 ]; then
+            echo "SUCCESS: Table ${table_name} has ${count} rows. Final 
validation OK."
+            echo "DEBUG: validate_table returning 0 (success)."
+            return 0 # Indicate success
+          else
+            echo "ERROR: Failed to get a non-zero row count for table 
${table_name} after $retries retries (Last exit code: $exit_code, Last 
processed count: '$count')."
+            echo "DEBUG: validate_table returning 1 (failure)."
+            return 1 # Indicate failure
+          fi
+        }

Review Comment:
   We're at the point where this is now a lot of inlined bash in the workflow 
file (much more so than the previous version of this workflow). This is a bit 
of an antipattern for 2 reasons:
   
   1) It is hard to run this yourself locally - in general, github actions 
workflows should be easy to reproduce locally for debugging purposes. Doing 
that requires a lot of error prone copy pasting which will make iterating on 
changes hard.
   2) Managing large bash scripts within the scope of a yaml file means we lose 
the benefits of normal local IDE development (e.g. if people have vscode 
plugins for bash, they won't work on yaml)
   3) Sharing infra across tests is hard.
   
   If we need this much bash to get this working, I'd suggest we put most of 
the logic in one or more bash files and just invoke them directly.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@beam.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to