tvalentyn commented on code in PR #23931:
URL: https://github.com/apache/beam/pull/23931#discussion_r1055374663
##########
sdks/python/apache_beam/testing/analyzers/perf_analysis.py:
##########
@@ -73,7 +73,7 @@ def run_change_point_analysis(params, test_id,
big_query_metrics_fetcher):
change_point_index = find_latest_change_point_index(
metric_values=metric_values)
if not change_point_index:
- return
+ return False
Review Comment:
Let's add a docstring on line 49.
##########
sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py:
##########
@@ -30,7 +33,30 @@
from apache_beam.testing.analyzers.perf_analysis_utils import e_divisive
from apache_beam.testing.analyzers.perf_analysis_utils import validate_config
except ImportError as e:
- analysis = None
+ analysis = None # type: ignore
+
+
+# mock methods.
+def get_fake_data_with_no_change_point(**kwargs):
+ num_samples = 20
+ metric_values = [1 for i in range(num_samples)]
+ timestamps = [i for i in range(num_samples)]
Review Comment:
```suggestion
timestamps = list(range(num_samples))
```
##########
sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py:
##########
@@ -51,23 +86,20 @@ def test_edivisive_means(self):
self.assertEqual(sorted(change_point_indexes), [10, 20])
def test_is_changepoint_in_valid_window(self):
-
changepoint_to_recent_run_window = 19
change_point_index = 14
is_valid = is_change_point_in_valid_window(
changepoint_to_recent_run_window, change_point_index)
self.assertEqual(is_valid, True)
- changepoint_to_recent_run_window = 13
- is_valid = is_change_point_in_valid_window(
- changepoint_to_recent_run_window, change_point_index)
- self.assertEqual(is_valid, False)
+ def test_is_change_point_in_invalid_window(self):
Review Comment:
```suggestion
def test_change_point_outside_inspection_window_is_not_a_valid_alert(self):
```
##########
sdks/python/apache_beam/testing/analyzers/github_issues_utils.py:
##########
@@ -85,7 +85,7 @@ def create_issue(
'labels': [_AWAITING_TRIAGE_LABEL, _PERF_ALERT_LABEL]
}
if labels:
- data['labels'].extend(labels)
+ data['labels'].extend(labels) # type: ignore
Review Comment:
For my education, why was this necessary?
##########
sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py:
##########
@@ -43,6 +69,15 @@ def setUp(self) -> None:
self.multiple_change_point_series = self.single_change_point_series + [
2
] * 20
+ self.timestamps = [time.time() + 5 for i in range(5)]
Review Comment:
[time.time() + i for i in range(5)] ?
Also, I'd use some fixed timestamp instead of calling to time() each
iteration, or even 0.
##########
sdks/python/apache_beam/testing/analyzers/README.md:
##########
@@ -49,8 +49,8 @@ test_1:
metric_name: mean_load_model_latency_milli_secs
labels:
- run-inference
- min_runs_between_change_points: 5
- num_runs_in_change_point_window: 7
+ min_runs_between_change_points: 5 # optional parameter
+ num_runs_in_change_point_window: 7 # optional parameter
Review Comment:
is 7 a good example for this param? It sounds a bit low.
##########
sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py:
##########
@@ -30,7 +33,30 @@
from apache_beam.testing.analyzers.perf_analysis_utils import e_divisive
from apache_beam.testing.analyzers.perf_analysis_utils import validate_config
except ImportError as e:
- analysis = None
+ analysis = None # type: ignore
+
+
+# mock methods.
+def get_fake_data_with_no_change_point(**kwargs):
+ num_samples = 20
+ metric_values = [1 for i in range(num_samples)]
Review Comment:
```suggestion
metric_values = [1] * num_samples
```
##########
sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py:
##########
@@ -80,34 +112,91 @@ def test_validate_config(self):
self.assertEqual(test_keys, constants._PERF_TEST_KEYS)
self.assertTrue(validate_config(test_keys))
- def test_is_perf_alert(self):
- timestamp_1 = time.time()
- timestamps = [timestamp_1 + i for i in range(4, -1, -1)]
-
+ def test_duplicate_change_point(self):
change_point_index = 2
min_runs_between_change_points = 1
-
is_alert = is_perf_alert(
- previous_change_point_timestamps=[timestamps[3]],
- timestamps=timestamps,
+ previous_change_point_timestamps=[self.timestamps[0]],
+ timestamps=self.timestamps,
change_point_index=change_point_index,
min_runs_between_change_points=min_runs_between_change_points)
- self.assertFalse(is_alert)
+ self.assertTrue(is_alert)
+ def test_not_duplicate_change_point(self):
Review Comment:
```suggestion
def test_duplicate_change_points_are_not_valid_alerts(self):
```
##########
sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py:
##########
@@ -80,34 +112,91 @@ def test_validate_config(self):
self.assertEqual(test_keys, constants._PERF_TEST_KEYS)
self.assertTrue(validate_config(test_keys))
- def test_is_perf_alert(self):
- timestamp_1 = time.time()
- timestamps = [timestamp_1 + i for i in range(4, -1, -1)]
-
+ def test_duplicate_change_point(self):
change_point_index = 2
min_runs_between_change_points = 1
-
is_alert = is_perf_alert(
- previous_change_point_timestamps=[timestamps[3]],
- timestamps=timestamps,
+ previous_change_point_timestamps=[self.timestamps[0]],
+ timestamps=self.timestamps,
change_point_index=change_point_index,
min_runs_between_change_points=min_runs_between_change_points)
- self.assertFalse(is_alert)
+ self.assertTrue(is_alert)
+ def test_not_duplicate_change_point(self):
Review Comment:
It's best to name tests so that the names tell the scenario under test and
the expected result.
##########
sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py:
##########
@@ -80,34 +112,91 @@ def test_validate_config(self):
self.assertEqual(test_keys, constants._PERF_TEST_KEYS)
self.assertTrue(validate_config(test_keys))
- def test_is_perf_alert(self):
- timestamp_1 = time.time()
- timestamps = [timestamp_1 + i for i in range(4, -1, -1)]
-
+ def test_duplicate_change_point(self):
change_point_index = 2
min_runs_between_change_points = 1
-
is_alert = is_perf_alert(
- previous_change_point_timestamps=[timestamps[3]],
- timestamps=timestamps,
+ previous_change_point_timestamps=[self.timestamps[0]],
+ timestamps=self.timestamps,
change_point_index=change_point_index,
min_runs_between_change_points=min_runs_between_change_points)
- self.assertFalse(is_alert)
+ self.assertTrue(is_alert)
+ def test_not_duplicate_change_point(self):
+ change_point_index = 2
+ min_runs_between_change_points = 1
is_alert = is_perf_alert(
- previous_change_point_timestamps=[timestamps[0]],
- timestamps=timestamps,
+ previous_change_point_timestamps=[self.timestamps[3]],
+ timestamps=self.timestamps,
change_point_index=change_point_index,
min_runs_between_change_points=min_runs_between_change_points)
- self.assertTrue(is_alert)
+ self.assertFalse(is_alert)
is_alert = is_perf_alert(
- previous_change_point_timestamps=[timestamps[0], timestamps[3]],
- timestamps=timestamps,
+ previous_change_point_timestamps=[
+ self.timestamps[0], self.timestamps[3]
+ ],
+ timestamps=self.timestamps,
change_point_index=change_point_index,
min_runs_between_change_points=min_runs_between_change_points)
self.assertFalse(is_alert)
+ @mock.patch(
+ 'apache_beam.testing.analyzers.perf_analysis.fetch_metric_data',
+ get_fake_data_with_no_change_point)
+ def test_alert_on_data_with_no_change_point(self):
+ is_alert = analysis.run_change_point_analysis(
+ params=self.params,
+ test_id=self.test_id,
+ big_query_metrics_fetcher=None)
+ self.assertFalse(is_alert)
+
+ @mock.patch(
+ 'apache_beam.testing.analyzers.perf_analysis.fetch_metric_data',
+ get_fake_data_with_change_point)
+ @mock.patch(
+ 'apache_beam.testing.analyzers.perf_analysis.get_existing_issues_data',
+ lambda *args,
Review Comment:
use `return_value` here and below, e.g.:
`@mock.patch('...', return_value=None)`
##########
sdks/python/apache_beam/testing/analyzers/perf_analysis_test.py:
##########
@@ -80,34 +112,91 @@ def test_validate_config(self):
self.assertEqual(test_keys, constants._PERF_TEST_KEYS)
self.assertTrue(validate_config(test_keys))
- def test_is_perf_alert(self):
- timestamp_1 = time.time()
- timestamps = [timestamp_1 + i for i in range(4, -1, -1)]
-
+ def test_duplicate_change_point(self):
change_point_index = 2
min_runs_between_change_points = 1
-
is_alert = is_perf_alert(
- previous_change_point_timestamps=[timestamps[3]],
- timestamps=timestamps,
+ previous_change_point_timestamps=[self.timestamps[0]],
+ timestamps=self.timestamps,
change_point_index=change_point_index,
min_runs_between_change_points=min_runs_between_change_points)
- self.assertFalse(is_alert)
+ self.assertTrue(is_alert)
+ def test_not_duplicate_change_point(self):
+ change_point_index = 2
+ min_runs_between_change_points = 1
is_alert = is_perf_alert(
- previous_change_point_timestamps=[timestamps[0]],
- timestamps=timestamps,
+ previous_change_point_timestamps=[self.timestamps[3]],
+ timestamps=self.timestamps,
change_point_index=change_point_index,
min_runs_between_change_points=min_runs_between_change_points)
- self.assertTrue(is_alert)
+ self.assertFalse(is_alert)
is_alert = is_perf_alert(
- previous_change_point_timestamps=[timestamps[0], timestamps[3]],
- timestamps=timestamps,
+ previous_change_point_timestamps=[
+ self.timestamps[0], self.timestamps[3]
+ ],
+ timestamps=self.timestamps,
change_point_index=change_point_index,
min_runs_between_change_points=min_runs_between_change_points)
self.assertFalse(is_alert)
+ @mock.patch(
+ 'apache_beam.testing.analyzers.perf_analysis.fetch_metric_data',
+ get_fake_data_with_no_change_point)
+ def test_alert_on_data_with_no_change_point(self):
Review Comment:
```suggestion
def test_no_alerts_when_no_change_points(self):
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]