Re: [PR] Add ability to run performance regression checks on Beam IO Load tests. [beam]

via GitHub Tue, 31 Oct 2023 13:25:30 -0700


Abacn commented on code in PR #29226:
URL: https://github.com/apache/beam/pull/29226#discussion_r1378125759



##########
sdks/python/apache_beam/testing/analyzers/load_test_perf_analysis.py:
##########
@@ -0,0 +1,64 @@
+import argparse
+import logging
+from apache_beam.testing.analyzers import constants
+from apache_beam.testing.analyzers import perf_analysis_utils
+from apache_beam.testing.analyzers import perf_analysis
+from apache_beam.testing.analyzers.perf_analysis_utils import MetricContainer, 
TestConfigContainer
+from google.cloud import bigquery
+
+
+class LoadTestMetricsFetcher(perf_analysis_utils.MetricsFetcher):
+    def fetch_metric_data(self, *, test_config: TestConfigContainer) -> 
MetricContainer:
+        test_name, pipeline_name = test_config.test_name.split(',')
+        query = f"""
+          SELECT timestamp, metric.value
+          FROM 
{test_config.project}.{test_config.metrics_dataset}.{test_config.metrics_table}
+          CROSS JOIN UNNEST(metrics) AS metric
+          WHERE test_name = "{test_name}" AND pipeline_name = 
"{pipeline_name}" AND metric.name = "{test_config.metric_name}"
+          ORDER BY timestamp DESC
+          LIMIT {constants._NUM_DATA_POINTS_TO_RUN_CHANGE_POINT_ANALYSIS}
+        """
+        logging.debug("Running query: %s" % query)
+        client = bigquery.Client()
+        query_job = client.query(query=query)
+        metric_data = query_job.result().to_dataframe()
+        if metric_data.empty:
+            logging.error("No results returned from BigQuery. Please check the 
query.")
+        return MetricContainer(
+            values=metric_data['value'].tolist(),
+            timestamps=metric_data['timestamp'].tolist(),
+        )
+
+
+if __name__ == '__main__':
+    logging.basicConfig(level=logging.INFO)
+    load_test_metrics_fetcher = LoadTestMetricsFetcher()
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--config_file_path',
+        required=True,
+        type=str,
+        help='Path to the config file that contains data to run the Change 
Point '
+             'Analysis.The default file will used will be '
+             'apache_beam/testing/analyzers/tests.config.yml. '
+             'If you would like to use the Change Point Analysis for finding '
+             'performance regression in the tests, '
+             'please provide an .yml file in the same structure as the above '
+             'mentioned file. ')
+    parser.add_argument(
+        '--save_alert_metadata',
+        action='store_true',
+        default=False,
+        help='Save perf alert/ GH Issue metadata to BigQuery table.')
+    known_args, unknown_args = parser.parse_known_args()
+
+    if unknown_args:
+        logging.warning('Discarding unknown arguments : %s ' % unknown_args)
+
+    perf_analysis.run(
+        big_query_metrics_fetcher=load_test_metrics_fetcher,
+        config_file_path=known_args.config_file_path,
+        # Set this to true while running in production.
+        save_alert_metadata=known_args.save_alert_metadata  # pylint: 
disable=line-too-long

Review Comment:
   seems no need "line-to-long"



##########
sdks/python/apache_beam/testing/analyzers/load_test_perf_analysis.py:
##########
@@ -0,0 +1,64 @@
+import argparse
+import logging
+from apache_beam.testing.analyzers import constants
+from apache_beam.testing.analyzers import perf_analysis_utils
+from apache_beam.testing.analyzers import perf_analysis
+from apache_beam.testing.analyzers.perf_analysis_utils import MetricContainer, 
TestConfigContainer
+from google.cloud import bigquery
+
+
+class LoadTestMetricsFetcher(perf_analysis_utils.MetricsFetcher):

Review Comment:
   a brief pydoc string is appreciate. e.g. if this is considered internal, 
then disclaim this is internal



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] Add ability to run performance regression checks on Beam IO Load tests. [beam]

Reply via email to