This is an automated email from the ASF dual-hosted git repository.
tvalentyn pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 6f82f34eeb1 Onboard Performance tests using the Change point analysis
tool. (#25754)
6f82f34eeb1 is described below
commit 6f82f34eeb14097810f3e8faee10221ac1c54aa3
Author: Anand Inguva <[email protected]>
AuthorDate: Fri Mar 17 14:49:07 2023 -0400
Onboard Performance tests using the Change point analysis tool. (#25754)
---
.github/workflows/run_perf_alert_tool.yml | 24 +++++----
.../python/apache_beam/testing/analyzers/README.md | 9 ++--
.../apache_beam/testing/analyzers/constants.py | 2 +-
.../testing/analyzers/github_issues_utils.py | 3 +-
.../apache_beam/testing/analyzers/perf_analysis.py | 17 ++++---
.../testing/analyzers/perf_analysis_utils.py | 20 ++++----
.../testing/analyzers/tests_config.yaml | 59 ++++++++++++++++++----
.../benchmarks/cloudml/cloudml_benchmark_test.py | 24 ---------
8 files changed, 93 insertions(+), 65 deletions(-)
diff --git a/.github/workflows/run_perf_alert_tool.yml
b/.github/workflows/run_perf_alert_tool.yml
index 30779e765f1..65e89ca5f5f 100644
--- a/.github/workflows/run_perf_alert_tool.yml
+++ b/.github/workflows/run_perf_alert_tool.yml
@@ -17,12 +17,12 @@
# To learn more about GitHub Actions in Apache Beam check the CI.md
-name: Run performance alerting tool on Python load/performance/benchmark tests.
+name: Performance alerting tool on Python load/performance/benchmark tests.
on:
+ workflow_dispatch:
schedule:
- cron: '5 22 * * *'
-
jobs:
python_run_change_point_analysis:
name: Run Change Point Analysis.
@@ -34,6 +34,11 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: 3.8
+ - name: Authenticate on GCP
+ uses: google-github-actions/setup-gcloud@v0
+ with:
+ service_account_key: ${{ secrets.GCP_SA_KEY }}
+ export_default_credentials: true
- name: Get Apache Beam Build dependencies
working-directory: ./sdks/python
run: pip install pip setuptools --upgrade && pip install -r
build-requirements.txt
@@ -43,17 +48,16 @@ jobs:
- name: Install signal-processing-algorithms
run: pip install signal-processing-algorithms
- name: Install pandas, yaml, requests
- run: pip install pandas PyYAML requests
-# - name: Run Change Point Analysis.
-# working-directory: ./sdks/python/apache_beam/testing/analyzers
-# shell: bash
-# run: python analysis.py
-# env:
-# GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+ run: pip install pandas db-dtypes PyYAML requests
+ - name: Run Change Point Analysis.
+ working-directory: ./sdks/python/apache_beam/testing/analyzers
+ shell: bash
+ run: python perf_analysis.py
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Run change point analysis tests.
working-directory: ./sdks/python/apache_beam/testing/analyzers
shell: bash
run: pytest perf_analysis_test.py
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
diff --git a/sdks/python/apache_beam/testing/analyzers/README.md
b/sdks/python/apache_beam/testing/analyzers/README.md
index 9f50d9797b5..71351fe3e57 100644
--- a/sdks/python/apache_beam/testing/analyzers/README.md
+++ b/sdks/python/apache_beam/testing/analyzers/README.md
@@ -41,7 +41,8 @@ please follow the below structure.
```
# the test_1 must be a unique id.
test_1:
- test_name:
apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks
+ test_name: Pytorch image classification on 50k images of size 224 x 224 with
resnet 152
+ test_target:
apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks
source: big_query
metrics_dataset: beam_run_inference
metrics_table: torch_inference_imagenet_results_resnet152
@@ -53,9 +54,9 @@ test_1:
num_runs_in_change_point_window: 30 # optional parameter
```
-**NOTE**: `test_name` should be in the format `apache_beam.foo.bar`. It should
point to a single test target.
+**NOTE**: `test_target` is optional. It is used for identifying the test that
was causing the regression.
-**Note**: If the source is **BigQuery**, the metrics_dataset, metrics_table,
project and metric_name should match with the values defined for
performance/load tests.
+**Note**: If the source is **BigQuery**, the `metrics_dataset`,
`metrics_table`, `project` and `metric_name` should match with the values
defined for performance/load tests.
The above example uses this [test
configuration](https://github.com/apache/beam/blob/0a91d139dea4276dc46176c4cdcdfce210fc50c4/.test-infra/jenkins/job_InferenceBenchmarkTests_Python.groovy#L30)
to fill up the values required to fetch the data from source.
@@ -83,7 +84,7 @@ All the performance/load tests metrics defined at
[beam/.test-infra/jenkins](htt
find the alerted test dashboard to find a spike in the metric values.
For example, for the below configuration,
-* test:
`apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks`
+* test_target:
`apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks`
* metric_name: `mean_load_model_latency_milli_secs`
Grafana dashboard can be found at
http://104.154.241.245/d/ZpS8Uf44z/python-ml-runinference-benchmarks?orgId=1&viewPanel=7
diff --git a/sdks/python/apache_beam/testing/analyzers/constants.py
b/sdks/python/apache_beam/testing/analyzers/constants.py
index c4bdded77a0..c0df05f61db 100644
--- a/sdks/python/apache_beam/testing/analyzers/constants.py
+++ b/sdks/python/apache_beam/testing/analyzers/constants.py
@@ -34,7 +34,7 @@ _NUM_RESULTS_TO_DISPLAY_ON_ISSUE_DESCRIPTION = 10
_NUM_DATA_POINTS_TO_RUN_CHANGE_POINT_ANALYSIS = 100
# Variables used for finding duplicate change points.
_DEFAULT_MIN_RUNS_BETWEEN_CHANGE_POINTS = 3
-_DEFAULT_NUM_RUMS_IN_CHANGE_POINT_WINDOW = 30
+_DEFAULT_NUM_RUMS_IN_CHANGE_POINT_WINDOW = 14
_PERF_TEST_KEYS = {
'test_name', 'metrics_dataset', 'metrics_table', 'project', 'metric_name'
diff --git a/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
b/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
index 398a98e00ce..d0944a91318 100644
--- a/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
+++ b/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
@@ -32,8 +32,7 @@ except KeyError as e:
'A Github Personal Access token is required '
'to create Github Issues.')
-# TODO: Change the REPO owner name to apache before merging.
-_BEAM_GITHUB_REPO_OWNER = 'AnandInguva'
+_BEAM_GITHUB_REPO_OWNER = 'apache'
_BEAM_GITHUB_REPO_NAME = 'beam'
# Adding GitHub Rest API version to the header to maintain version stability.
# For more information, please look at
diff --git a/sdks/python/apache_beam/testing/analyzers/perf_analysis.py
b/sdks/python/apache_beam/testing/analyzers/perf_analysis.py
index 870deed770c..ee00e8abf42 100644
--- a/sdks/python/apache_beam/testing/analyzers/perf_analysis.py
+++ b/sdks/python/apache_beam/testing/analyzers/perf_analysis.py
@@ -82,6 +82,7 @@ def run_change_point_analysis(params, test_id,
big_query_metrics_fetcher):
change_point_index = find_latest_change_point_index(
metric_values=metric_values)
if not change_point_index:
+ logging.info("Change point is not detected for the test %s" % test_name)
return False
# since timestamps are ordered in ascending order and
# num_runs_in_change_point_window refers to the latest runs,
@@ -92,18 +93,21 @@ def run_change_point_analysis(params, test_id,
big_query_metrics_fetcher):
latest_change_point_run):
logging.info(
'Performance regression/improvement found for the test: %s. '
- 'Since the change point run %s '
+ 'on metric %s. Since the change point run %s '
'lies outside the num_runs_in_change_point_window distance: %s, '
'alert is not raised.' % (
params['test_name'],
- latest_change_point_run,
+ metric_name,
+ latest_change_point_run + 1,
num_runs_in_change_point_window))
return False
is_alert = True
last_reported_issue_number = None
+ issue_metadata_table_name = f'{params.get("metrics_table")}_{metric_name}'
existing_issue_data = get_existing_issues_data(
- test_name=test_name, big_query_metrics_fetcher=big_query_metrics_fetcher)
+ table_name=issue_metadata_table_name,
+ big_query_metrics_fetcher=big_query_metrics_fetcher)
if existing_issue_data is not None:
existing_issue_timestamps = existing_issue_data[
@@ -116,7 +120,6 @@ def run_change_point_analysis(params, test_id,
big_query_metrics_fetcher):
change_point_index=change_point_index,
timestamps=timestamps,
min_runs_between_change_points=min_runs_between_change_points)
-
logging.debug(
"Performance alert is %s for test %s" % (is_alert, params['test_name']))
if is_alert:
@@ -124,7 +127,9 @@ def run_change_point_analysis(params, test_id,
big_query_metrics_fetcher):
metric_name, params['test_name'], timestamps,
metric_values, change_point_index,
params.get('labels', None),
- last_reported_issue_number)
+ last_reported_issue_number,
+ test_target=params['test_target'] if 'test_target' in params else None
+ )
issue_metadata = GitHubIssueMetaData(
issue_timestamp=pd.Timestamp(
@@ -138,7 +143,7 @@ def run_change_point_analysis(params, test_id,
big_query_metrics_fetcher):
change_point_timestamp=timestamps[change_point_index])
publish_issue_metadata_to_big_query(
- issue_metadata=issue_metadata, test_name=test_name)
+ issue_metadata=issue_metadata, table_name=issue_metadata_table_name)
return is_alert
diff --git a/sdks/python/apache_beam/testing/analyzers/perf_analysis_utils.py
b/sdks/python/apache_beam/testing/analyzers/perf_analysis_utils.py
index 247fe07f4df..ec74f206ce8 100644
--- a/sdks/python/apache_beam/testing/analyzers/perf_analysis_utils.py
+++ b/sdks/python/apache_beam/testing/analyzers/perf_analysis_utils.py
@@ -59,7 +59,7 @@ def is_change_point_in_valid_window(
def get_existing_issues_data(
- test_name: str, big_query_metrics_fetcher: BigQueryMetricsFetcher
+ table_name: str, big_query_metrics_fetcher: BigQueryMetricsFetcher
) -> Optional[pd.DataFrame]:
"""
Finds the most recent GitHub issue created for the test_name.
@@ -67,7 +67,7 @@ def get_existing_issues_data(
else return latest created issue_number along with
"""
query = f"""
- SELECT * FROM
{constants._BQ_PROJECT_NAME}.{constants._BQ_DATASET}.{test_name}
+ SELECT * FROM
{constants._BQ_PROJECT_NAME}.{constants._BQ_DATASET}.{table_name}
ORDER BY {constants._ISSUE_CREATION_TIMESTAMP_LABEL} DESC
LIMIT 10
"""
@@ -164,19 +164,19 @@ def find_latest_change_point_index(metric_values:
List[Union[float, int]]):
return change_points_idx[-1]
-def publish_issue_metadata_to_big_query(issue_metadata, test_name):
+def publish_issue_metadata_to_big_query(issue_metadata, table_name):
"""
Published issue_metadata to BigQuery with table name=test_name.
"""
bq_metrics_publisher = BigQueryMetricsPublisher(
project_name=constants._BQ_PROJECT_NAME,
dataset=constants._BQ_DATASET,
- table=test_name,
+ table=table_name,
bq_schema=constants._SCHEMA)
bq_metrics_publisher.publish([asdict(issue_metadata)])
logging.info(
'GitHub metadata is published to Big Query Dataset %s'
- ', table %s' % (constants._BQ_DATASET, test_name))
+ ', table %s' % (constants._BQ_DATASET, table_name))
def create_performance_alert(
@@ -186,13 +186,15 @@ def create_performance_alert(
metric_values: List[Union[int, float]],
change_point_index: int,
labels: List[str],
- existing_issue_number: Optional[int]) -> Tuple[int, str]:
+ existing_issue_number: Optional[int],
+ test_target: Optional[str] = None) -> Tuple[int, str]:
"""
Creates performance alert on GitHub issues and returns GitHub issue
number and issue URL.
"""
description = github_issues_utils.get_issue_description(
- test_name=test_name,
+ test_name=(
+ test_name if not test_target else test_name + ':' + test_target),
metric_name=metric_name,
timestamps=timestamps,
metric_values=metric_values,
@@ -209,6 +211,6 @@ def create_performance_alert(
existing_issue_number=existing_issue_number)
logging.info(
- 'Performance regression is alerted on issue #%s. Link to '
- 'the issue: %s' % (issue_number, issue_url))
+ 'Performance regression/improvement is alerted on issue #%s. Link '
+ ': %s' % (issue_number, issue_url))
return issue_number, issue_url
diff --git a/sdks/python/apache_beam/testing/analyzers/tests_config.yaml
b/sdks/python/apache_beam/testing/analyzers/tests_config.yaml
index 9a208ea9e81..02e649c7586 100644
--- a/sdks/python/apache_beam/testing/analyzers/tests_config.yaml
+++ b/sdks/python/apache_beam/testing/analyzers/tests_config.yaml
@@ -16,22 +16,63 @@
#
test_1:
- test_name:
apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks_22
+ test_name: Pytorch image classification on 50k images of size 224 x 224 with
resnet 152
+ test_target:
apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks
metrics_dataset: beam_run_inference
metrics_table: torch_inference_imagenet_results_resnet152
project: apache-beam-testing
metric_name: mean_load_model_latency_milli_secs
- labels:
- - run-inference
- # Optional parameters.
- min_runs_between_change_points: 3
- num_runs_in_change_point_window: 30
test_2:
- test_name:
apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks
+ test_name: Pytorch image classification on 50k images of size 224 x 224 with
resnet 152
+ test_target:
apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks
+ metrics_dataset: beam_run_inference
+ metrics_table: torch_inference_imagenet_results_resnet152
+ project: apache-beam-testing
+ metric_name: mean_inference_batch_latency_micro_secs
+
+test_3:
+ test_name: Pytorch image classification on 50k images of size 224 x 224 with
resnet 101
+ test_target:
apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks
+ metrics_dataset: beam_run_inference
+ metrics_table: torch_inference_imagenet_results_resnet101
+ project: apache-beam-testing
+ metric_name: mean_load_model_latency_milli_secs
+
+test_4:
+ test_name: Pytorch image classification on 50k images of size 224 x 224 with
resnet 101
+ test_target:
apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks
metrics_dataset: beam_run_inference
metrics_table: torch_inference_imagenet_results_resnet101
project: apache-beam-testing
+ metric_name: mean_inference_batch_latency_micro_secs
+
+test_5:
+ test_name: test_cloudml_benchmark_cirteo_no_shuffle_10GB
+ metrics_dataset: beam_cloudml
+ metrics_table: cloudml_benchmark_cirteo_no_shuffle_10GB
+ project: apache-beam-testing
+ metric_name: runtime_sec
+
+test_6:
+ test_name: test_cloudml_benchmark_criteo_10GB
+ metrics_dataset: beam_cloudml
+ metrics_table: cloudml_benchmark_criteo_10GB
+ project: apache-beam-testing
+ metric_name: runtime_sec
+
+test_7:
+ test_name: Pytorch image classification on 50k images of size 224 x 224 with
resnet 152 with Tesla T4 GPU
+ test_target:
apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks
+ metrics_dataset: beam_run_inference
+ metrics_table: torch_inference_imagenet_results_resnet152_tesla_t4
+ project: apache-beam-testing
+ metric_name: mean_inference_batch_latency_micro_secs
+
+test_8:
+ test_name: Pytorch image classification on 50k images of size 224 x 224 with
resnet 152 with Tesla T4 GPU
+ test_target:
apache_beam.testing.benchmarks.inference.pytorch_image_classification_benchmarks
+ metrics_dataset: beam_run_inference
+ metrics_table: torch_inference_imagenet_results_resnet152_tesla_t4
+ project: apache-beam-testing
metric_name: mean_load_model_latency_milli_secs
- labels:
- - run-inference
diff --git
a/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_test.py
b/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_test.py
index e35f1fd8ec9..3e5a640c7aa 100644
---
a/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_test.py
+++
b/sdks/python/apache_beam/testing/benchmarks/cloudml/cloudml_benchmark_test.py
@@ -122,30 +122,6 @@ class CloudMLTFTBenchmarkTest(unittest.TestCase):
metrics_table=metrics_table,
metric_name='runtime_sec')
- def test_cloud_ml_benchmark_criteo_fixed_workers_10GB(self):
- test_pipeline = TestPipeline(is_integration_test=True)
- extra_opts = {}
- extra_opts['input'] = os.path.join(
- _INPUT_GCS_BUCKET_ROOT, lib.INPUT_CRITEO_10GB)
- extra_opts['benchmark_type'] = 'tft'
- extra_opts['classifier'] = 'criteo'
- extra_opts['frequency_threshold'] = 0
- extra_opts['output'] = os.path.join(
- _OUTPUT_GCS_BUCKET_ROOT, uuid.uuid4().hex)
- extra_opts['num_workers'] = 50
- extra_opts['machine_type'] = 'n1-standard-4'
- start_time = time.time()
- workflow.run(test_pipeline.get_full_options_as_args(**extra_opts))
- end_time = time.time()
-
- metrics_table = 'cloudml_benchmark_criteo_fixed_workers_10GB'
-
- _publish_metrics(
- pipeline=test_pipeline,
- metric_value=end_time - start_time,
- metrics_table=metrics_table,
- metric_name='runtime_sec')
-
if __name__ == '__main__':
unittest.main()