damccorm commented on code in PR #24073: URL: https://github.com/apache/beam/pull/24073#discussion_r1055604510
########## .test-infra/metrics/sync/github/sync_workflows.py: ########## @@ -0,0 +1,202 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +''' +This module queries GitHub to collect Beam-related workflows metrics and put them in +PostgreSQL. +This Script is running every 3 hours in a cloud function in apache-beam-testing project. +This cloud function is triggered by a pubsub topic. +You can find the cloud function in the next link +https://console.cloud.google.com/functions/details/us-central1/github_actions_workflows_dashboard_sync?env=gen1&project=apache-beam-testing +Pub sub topic : https://console.cloud.google.com/cloudpubsub/topic/detail/github_actions_workflows_sync?project=apache-beam-testing +Cron Job : https://console.cloud.google.com/cloudscheduler/jobs/edit/us-central1/github_actions_workflows_dashboard_sync?project=apache-beam-testing +Writing the latest 10 runs of every postcommit workflow in master branch in a beammetrics database +''' + +import os +import sys +import time +import re +import requests +import psycopg2 + +from datetime import datetime +from github import GithubIntegration + +DB_HOST = os.environ['DB_HOST'] +DB_PORT = os.environ['DB_PORT'] +DB_NAME = os.environ['DB_NAME'] +DB_USER_NAME = os.environ['DB_USER'] +DB_PASSWORD = os.environ['DB_PASS'] +GH_WORKFLOWS_TABLE_NAME = "github_workflows" +# Number of workflows that fetch github API +GH_NUMBER_OF_WORKFLOWS = 100 +GH_WORKFLOWS_NUMBER_EXECUTIONS = 10 +WORKFLOWS_OBJECT_LIST = [] + +class Workflow: + def __init__(self,id,name,filename): + self.id = id + self.name = name + self.filename = filename + self.listOfRuns = [] + self.runUrl = [] + +# The table will save the latest ten run of every workflow +GH_WORKFLOWS_CREATE_TABLE_QUERY = f""" +CREATE TABLE IF NOT EXISTS {GH_WORKFLOWS_TABLE_NAME} ( + job_name text PRIMARY KEY, + job_yml_filename text, + run1 text, + run1Id text, + run2 text, + run2Id text, + run3 text, + run3Id text, + run4 text, + run4Id text, + run5 text, + run5Id text, + run6 text, + run6Id text, + run7 text, + run7Id text, + run8 text, + run8Id text, + run9 text, + run9Id text, + run10 text, + run10Id text Review Comment: Could we construct this string programatically - something like: ``` GH_WORKFLOWS_CREATE_TABLE_QUERY = f""" CREATE TABLE IF NOT EXISTS {GH_WORKFLOWS_TABLE_NAME} ( job_name text PRIMARY KEY, job_yml_filename text, """ for i in range(0,GH_WORKFLOWS_NUMBER_EXECUTIONS): GH_WORKFLOWS_CREATE_TABLE_QUERY += " run{} text,\n run{}Id text,\n".format(i, i) GH_WORKFLOWS_CREATE_TABLE_QUERY += ")\n" ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
