This is an automated email from the ASF dual-hosted git repository.

anandinguva pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/beam.git


The following commit(s) were added to refs/heads/master by this push:
     new 5852a7af51f Append `anomaly` to the right change point index (#27288)
5852a7af51f is described below

commit 5852a7af51fa019ddd3cf0f47356fd55c6b42df6
Author: Anand Inguva <[email protected]>
AuthorDate: Fri Jul 14 13:34:14 2023 -0400

    Append `anomaly` to the right change point index (#27288)
    
    * Append `anomaly` to the right change point index
    
    * Update sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
    
    * Add test
    
    * Fix imports
    
    * Update sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py
    
    Co-authored-by: tvalentyn <[email protected]>
    
    * Address comments
    
    * Refactor code
    
    * Refactor code; address comments
    
    * Fix test check
    
    * Update perf_analysis_test.py
    
    * Fix formatting
    
    * address comments/remove new_lines_joiner constant
    
    * fix test
    
    ---------
    
    Co-authored-by: tvalentyn <[email protected]>
---
 .../apache_beam/testing/analyzers/constants.py     |  2 ++
 .../testing/analyzers/github_issues_utils.py       | 42 ++++++++++++----------
 .../testing/analyzers/perf_analysis_test.py        | 35 ++++++++++++++++--
 3 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/sdks/python/apache_beam/testing/analyzers/constants.py 
b/sdks/python/apache_beam/testing/analyzers/constants.py
index 2a52fab563d..8f8bdf13300 100644
--- a/sdks/python/apache_beam/testing/analyzers/constants.py
+++ b/sdks/python/apache_beam/testing/analyzers/constants.py
@@ -70,3 +70,5 @@ _SCHEMA = [{
            }, {
                'name': _ISSUE_URL, 'field_type': 'STRING', 'mode': 'REQUIRED'
            }]
+
+_ANOMALY_MARKER = ' <---- Anomaly'
diff --git a/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py 
b/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
index f6e2939161e..e1f20baa50a 100644
--- a/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
+++ b/sdks/python/apache_beam/testing/analyzers/github_issues_utils.py
@@ -24,6 +24,8 @@ from typing import Tuple
 import pandas as pd
 import requests
 
+from apache_beam.testing.analyzers import constants
+
 try:
   _GITHUB_TOKEN: Optional[str] = os.environ['GITHUB_TOKEN']
 except KeyError as e:
@@ -162,28 +164,32 @@ def get_issue_description(
   """
 
   # TODO: Add mean and median before and after the changepoint index.
-  max_timestamp_index = min(
-      change_point_index + max_results_to_display, len(metric_values) - 1)
-  min_timestamp_index = max(0, change_point_index - max_results_to_display)
 
-  description = _ISSUE_DESCRIPTION_TEMPLATE.format(
-      test_name, metric_name) + 2 * '\n'
+  description = []
+
+  description.append(_ISSUE_DESCRIPTION_TEMPLATE.format(test_name, 
metric_name))
 
-  description += (
-      "`Test description:` " + f'{test_description}' +
-      2 * '\n') if test_description else ''
+  description.append(("`Test description:` " +
+                      f'{test_description}') if test_description else '')
 
-  description += '```' + '\n'
-  runs_to_display = [
-      _METRIC_INFO_TEMPLATE.format(
-          timestamps[i].ctime(), format(metric_values[i], '.2f'))
-      for i in reversed(range(min_timestamp_index, max_timestamp_index + 1))
-  ]
+  description.append('```')
+
+  runs_to_display = []
+  max_timestamp_index = min(
+      change_point_index + max_results_to_display, len(metric_values) - 1)
+  min_timestamp_index = max(0, change_point_index - max_results_to_display)
 
-  runs_to_display[change_point_index - min_timestamp_index] += " <---- Anomaly"
-  description += '\n'.join(runs_to_display) + '\n'
-  description += '```' + '\n'
-  return description
+  # run in reverse to display the most recent runs first.
+  for i in reversed(range(min_timestamp_index, max_timestamp_index + 1)):
+    row_template = _METRIC_INFO_TEMPLATE.format(
+        timestamps[i].ctime(), format(metric_values[i], '.2f'))
+    if i == change_point_index:
+      row_template += constants._ANOMALY_MARKER
+    runs_to_display.append(row_template)
+
+  description.append(os.linesep.join(runs_to_display))
+  description.append('```')
+  return (2 * os.linesep).join(description)
 
 
 def report_change_point_on_issues(
diff --git a/sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py 
b/sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py
index fabf185a41d..c18b1bb9506 100644
--- a/sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py
+++ b/sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py
@@ -16,8 +16,10 @@
 #
 # pytype: skip-file
 
+import datetime
 import logging
 import os
+import re
 import unittest
 
 import mock
@@ -28,10 +30,13 @@ import pandas as pd
 try:
   import apache_beam.testing.analyzers.perf_analysis as analysis
   from apache_beam.testing.analyzers import constants
+  from apache_beam.testing.analyzers import github_issues_utils
   from apache_beam.testing.analyzers.perf_analysis_utils import 
is_change_point_in_valid_window
   from apache_beam.testing.analyzers.perf_analysis_utils import is_perf_alert
   from apache_beam.testing.analyzers.perf_analysis_utils import e_divisive
+  from apache_beam.testing.analyzers.perf_analysis_utils import 
find_latest_change_point_index
   from apache_beam.testing.analyzers.perf_analysis_utils import validate_config
+
 except ImportError as e:
   analysis = None  # type: ignore
 
@@ -45,17 +50,18 @@ def get_fake_data_with_no_change_point(**kwargs):
 
 
 def get_fake_data_with_change_point(**kwargs):
+  # change point will be at index 13.
   num_samples = 20
-  metric_values = [0] * (num_samples // 2) + [1] * (num_samples // 2)
+  metric_values = [0] * 12 + [3] + [4] * 7
   timestamps = [i for i in range(num_samples)]
   return metric_values, timestamps
 
 
 def get_existing_issue_data(**kwargs):
-  # change point found at index 10. So passing 10 in the
+  # change point found at index 13. So passing 13 in the
   # existing issue data in mock method.
   return pd.DataFrame([{
-      constants._CHANGE_POINT_TIMESTAMP_LABEL: 10,
+      constants._CHANGE_POINT_TIMESTAMP_LABEL: 13,
       constants._ISSUE_NUMBER: np.array([0])
   }])
 
@@ -193,6 +199,29 @@ class TestChangePointAnalysis(unittest.TestCase):
         big_query_metrics_fetcher=None)
     self.assertFalse(is_alert)
 
+  def test_change_point_has_anomaly_marker_in_gh_description(self):
+    metric_values, timestamps = get_fake_data_with_change_point()
+    timestamps = [datetime.datetime.fromtimestamp(ts) for ts in timestamps]
+    change_point_index = find_latest_change_point_index(metric_values)
+
+    description = github_issues_utils.get_issue_description(
+        test_name=self.test_id,
+        test_description=self.params['test_description'],
+        metric_name=self.params['metric_name'],
+        metric_values=metric_values,
+        timestamps=timestamps,
+        change_point_index=change_point_index,
+        max_results_to_display=(
+            constants._NUM_RESULTS_TO_DISPLAY_ON_ISSUE_DESCRIPTION))
+
+    runs_info = next((
+        line for line in description.split(2 * os.linesep)
+        if re.match(r'timestamp: .*, metric_value: .*', line.strip())),
+                     '')
+    pattern = (r'timestamp: .+ (\d{4}), metric_value: (\d+.\d+) <---- Anomaly')
+    match = re.search(pattern, runs_info)
+    self.assertTrue(match)
+
 
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.DEBUG)

Reply via email to