This is an automated email from the ASF dual-hosted git repository.
anandinguva pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git
The following commit(s) were added to refs/heads/master by this push:
new 5852a7af51f Append `anomaly` to the right change point index (#27288)
5852a7af51f is described below
commit 5852a7af51fa019ddd3cf0f47356fd55c6b42df6
Author: Anand Inguva <[email protected]>
AuthorDate: Fri Jul 14 13:34:14 2023 -0400
Append `anomaly` to the right change point index (#27288)
* Append `anomaly` to the right change point index
* Update sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
* Add test
* Fix imports
* Update sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py
Co-authored-by: tvalentyn <[email protected]>
* Address comments
* Refactor code
* Refactor code; address comments
* Fix test check
* Update perf_analysis_test.py
* Fix formatting
* address comments/remove new_lines_joiner constant
* fix test
---------
Co-authored-by: tvalentyn <[email protected]>
---
.../apache_beam/testing/analyzers/constants.py | 2 ++
.../testing/analyzers/github_issues_utils.py | 42 ++++++++++++----------
.../testing/analyzers/perf_analysis_test.py | 35 ++++++++++++++++--
3 files changed, 58 insertions(+), 21 deletions(-)
diff --git a/sdks/python/apache_beam/testing/analyzers/constants.py
b/sdks/python/apache_beam/testing/analyzers/constants.py
index 2a52fab563d..8f8bdf13300 100644
--- a/sdks/python/apache_beam/testing/analyzers/constants.py
+++ b/sdks/python/apache_beam/testing/analyzers/constants.py
@@ -70,3 +70,5 @@ _SCHEMA = [{
}, {
'name': _ISSUE_URL, 'field_type': 'STRING', 'mode': 'REQUIRED'
}]
+
+_ANOMALY_MARKER = ' <---- Anomaly'
diff --git a/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
b/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
index f6e2939161e..e1f20baa50a 100644
--- a/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
+++ b/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
@@ -24,6 +24,8 @@ from typing import Tuple
import pandas as pd
import requests
+from apache_beam.testing.analyzers import constants
+
try:
_GITHUB_TOKEN: Optional[str] = os.environ['GITHUB_TOKEN']
except KeyError as e:
@@ -162,28 +164,32 @@ def get_issue_description(
"""
# TODO: Add mean and median before and after the changepoint index.
- max_timestamp_index = min(
- change_point_index + max_results_to_display, len(metric_values) - 1)
- min_timestamp_index = max(0, change_point_index - max_results_to_display)
- description = _ISSUE_DESCRIPTION_TEMPLATE.format(
- test_name, metric_name) + 2 * '\n'
+ description = []
+
+ description.append(_ISSUE_DESCRIPTION_TEMPLATE.format(test_name,
metric_name))
- description += (
- "`Test description:` " + f'{test_description}' +
- 2 * '\n') if test_description else ''
+ description.append(("`Test description:` " +
+ f'{test_description}') if test_description else '')
- description += '```' + '\n'
- runs_to_display = [
- _METRIC_INFO_TEMPLATE.format(
- timestamps[i].ctime(), format(metric_values[i], '.2f'))
- for i in reversed(range(min_timestamp_index, max_timestamp_index + 1))
- ]
+ description.append('```')
+
+ runs_to_display = []
+ max_timestamp_index = min(
+ change_point_index + max_results_to_display, len(metric_values) - 1)
+ min_timestamp_index = max(0, change_point_index - max_results_to_display)
- runs_to_display[change_point_index - min_timestamp_index] += " <---- Anomaly"
- description += '\n'.join(runs_to_display) + '\n'
- description += '```' + '\n'
- return description
+ # run in reverse to display the most recent runs first.
+ for i in reversed(range(min_timestamp_index, max_timestamp_index + 1)):
+ row_template = _METRIC_INFO_TEMPLATE.format(
+ timestamps[i].ctime(), format(metric_values[i], '.2f'))
+ if i == change_point_index:
+ row_template += constants._ANOMALY_MARKER
+ runs_to_display.append(row_template)
+
+ description.append(os.linesep.join(runs_to_display))
+ description.append('```')
+ return (2 * os.linesep).join(description)
def report_change_point_on_issues(
diff --git a/sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py
b/sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py
index fabf185a41d..c18b1bb9506 100644
--- a/sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py
+++ b/sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py
@@ -16,8 +16,10 @@
#
# pytype: skip-file
+import datetime
import logging
import os
+import re
import unittest
import mock
@@ -28,10 +30,13 @@ import pandas as pd
try:
import apache_beam.testing.analyzers.perf_analysis as analysis
from apache_beam.testing.analyzers import constants
+ from apache_beam.testing.analyzers import github_issues_utils
from apache_beam.testing.analyzers.perf_analysis_utils import
is_change_point_in_valid_window
from apache_beam.testing.analyzers.perf_analysis_utils import is_perf_alert
from apache_beam.testing.analyzers.perf_analysis_utils import e_divisive
+ from apache_beam.testing.analyzers.perf_analysis_utils import
find_latest_change_point_index
from apache_beam.testing.analyzers.perf_analysis_utils import validate_config
+
except ImportError as e:
analysis = None # type: ignore
@@ -45,17 +50,18 @@ def get_fake_data_with_no_change_point(**kwargs):
def get_fake_data_with_change_point(**kwargs):
+ # change point will be at index 13.
num_samples = 20
- metric_values = [0] * (num_samples // 2) + [1] * (num_samples // 2)
+ metric_values = [0] * 12 + [3] + [4] * 7
timestamps = [i for i in range(num_samples)]
return metric_values, timestamps
def get_existing_issue_data(**kwargs):
- # change point found at index 10. So passing 10 in the
+ # change point found at index 13. So passing 13 in the
# existing issue data in mock method.
return pd.DataFrame([{
- constants._CHANGE_POINT_TIMESTAMP_LABEL: 10,
+ constants._CHANGE_POINT_TIMESTAMP_LABEL: 13,
constants._ISSUE_NUMBER: np.array([0])
}])
@@ -193,6 +199,29 @@ class TestChangePointAnalysis(unittest.TestCase):
big_query_metrics_fetcher=None)
self.assertFalse(is_alert)
+ def test_change_point_has_anomaly_marker_in_gh_description(self):
+ metric_values, timestamps = get_fake_data_with_change_point()
+ timestamps = [datetime.datetime.fromtimestamp(ts) for ts in timestamps]
+ change_point_index = find_latest_change_point_index(metric_values)
+
+ description = github_issues_utils.get_issue_description(
+ test_name=self.test_id,
+ test_description=self.params['test_description'],
+ metric_name=self.params['metric_name'],
+ metric_values=metric_values,
+ timestamps=timestamps,
+ change_point_index=change_point_index,
+ max_results_to_display=(
+ constants._NUM_RESULTS_TO_DISPLAY_ON_ISSUE_DESCRIPTION))
+
+ runs_info = next((
+ line for line in description.split(2 * os.linesep)
+ if re.match(r'timestamp: .*, metric_value: .*', line.strip())),
+ '')
+ pattern = (r'timestamp: .+ (\d{4}), metric_value: (\d+.\d+) <---- Anomaly')
+ match = re.search(pattern, runs_info)
+ self.assertTrue(match)
+
if __name__ == '__main__':
logging.getLogger().setLevel(logging.DEBUG)