(kafka) branch trunk updated: KAFKA-18416 - added a new report section to catch persistent flaky tests (#18434)

davidarthur Thu, 16 Jan 2025 11:11:09 -0800

This is an automated email from the ASF dual-hosted git repository.

davidarthur pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/kafka.git



The following commit(s) were added to refs/heads/trunk by this push:
     new 0727f9be637 KAFKA-18416 - added a new report section to catch 
persistent flaky tests (#18434)
0727f9be637 is described below

commit 0727f9be6370c0ecf289f24069c01f49a4270e9f
Author: santhoshct <114662696+santhos...@users.noreply.github.com>
AuthorDate: Fri Jan 17 00:40:32 2025 +0530

    KAFKA-18416 - added a new report section to catch persistent flaky tests 
(#18434)
    
    Reviewers: David Arthur <mum...@gmail.com>
---
 .github/scripts/develocity_reports.py | 152 ++++++++++++++++++++++++++++++++--
 1 file changed, 147 insertions(+), 5 deletions(-)

diff --git a/.github/scripts/develocity_reports.py 
b/.github/scripts/develocity_reports.py
index d342cf86e1c..74df32a3ede 100644
--- a/.github/scripts/develocity_reports.py
+++ b/.github/scripts/develocity_reports.py
@@ -806,6 +806,80 @@ class TestAnalyzer:
         
         logger.info(f"Updated cache with {len(builds)} builds")
 
+    def get_persistent_failing_tests(self, results: List[TestResult], 
+                                   min_failure_rate: float = 0.2,
+                                   min_executions: int = 5) -> Dict[str, Dict]:
+        """
+        Identify tests that have been consistently failing/flaky over time.
+        Groups by test class and includes individual test cases.
+        """
+        persistent_failures = {}
+        current_time = datetime.now(pytz.UTC)
+        chunk_start = current_time - timedelta(days=7)  # Last 7 days for test 
cases
+        
+        # Group results by class
+        class_groups = {}
+        for result in results:
+            class_name = result.name.split('#')[0]  # Get class name
+            if class_name not in class_groups:
+                class_groups[class_name] = []
+            class_groups[class_name].append(result)
+        
+        # Analyze each class and its test cases
+        for class_name, class_results in class_groups.items():
+            class_total = sum(r.outcome_distribution.total for r in 
class_results)
+            class_problems = sum(r.outcome_distribution.failed + 
r.outcome_distribution.flaky 
+                               for r in class_results)
+            
+            if class_total < min_executions:
+                continue
+            
+            class_failure_rate = class_problems / class_total if class_total > 
0 else 0
+            
+            # Only include if class has significant failures
+            if class_failure_rate >= min_failure_rate:
+                try:
+                    # Get detailed test case information using the same method 
as other reports
+                    test_cases = self.get_test_case_details(
+                        class_name,
+                        "kafka",
+                        chunk_start,
+                        current_time,
+                        test_type="test"
+                    )
+                    
+                    failing_test_cases = {}
+                    for test_case in test_cases:
+                        total_runs = test_case.outcome_distribution.total
+                        if total_runs >= min_executions:
+                            problem_runs = 
(test_case.outcome_distribution.failed + 
+                                          test_case.outcome_distribution.flaky)
+                            failure_rate = problem_runs / total_runs if 
total_runs > 0 else 0
+                            
+                            if failure_rate >= min_failure_rate:
+                                # Extract just the method name
+                                method_name = test_case.name.split('.')[-1]
+                                failing_test_cases[method_name] = {
+                                    'result': test_case,
+                                    'failure_rate': failure_rate,
+                                    'total_executions': total_runs,
+                                    'failed_executions': problem_runs,
+                                    'timeline': sorted(test_case.timeline, 
key=lambda x: x.timestamp)
+                                }
+                    
+                    if failing_test_cases:  # Only include classes that have 
problematic test cases
+                        persistent_failures[class_name] = {
+                            'failure_rate': class_failure_rate,
+                            'total_executions': class_total,
+                            'failed_executions': class_problems,
+                            'test_cases': failing_test_cases
+                        }
+                        
+                except Exception as e:
+                    logger.error(f"Error getting test case details for 
{class_name}: {str(e)}")
+        
+        return persistent_failures
+
 def get_develocity_class_link(class_name: str, threshold_days: int, test_type: 
str = None) -> str:
     """
     Generate Develocity link for a test class
@@ -911,14 +985,13 @@ def print_most_problematic_tests(problematic_tests: 
Dict[str, Dict], threshold_d
                                 key=lambda x: (x.outcome_distribution.failed + 
x.outcome_distribution.flaky) / x.outcome_distribution.total 
                                 if x.outcome_distribution.total > 0 else 0,
                                 reverse=True):
-            method_name = test_method.name.split('.')[-1]
             if test_method.timeline:
                 print(f"\n#### {method_name}")
                 print("Recent Executions:")
                 print("```")
                 print("Date/Time (UTC)      Outcome    Build ID")
                 print("-" * 44)
-                for entry in sorted(test_method.timeline, key=lambda x: 
x.timestamp)[-5:]:
+                for entry in sorted(test_method.timeline, key=lambda x: 
x.timestamp, reverse=True)[:5]:
                     date_str = entry.timestamp.strftime('%Y-%m-%d %H:%M')
                     print(f"{date_str:<17} {entry.outcome:<10} 
{entry.build_id}")
                 print("```")
@@ -947,7 +1020,7 @@ def print_flaky_regressions(flaky_regressions: Dict[str, 
Dict], threshold_days:
         
         # Add recent execution details in sub-rows
         print("<tr><td colspan=\"5\">Recent Executions:</td></tr>")
-        for entry in sorted(details['recent_executions'], key=lambda x: 
x.timestamp)[-5:]:
+        for entry in sorted(details['recent_executions'], key=lambda x: 
x.timestamp, reverse=True)[:5]:
             date_str = entry.timestamp.strftime('%Y-%m-%d %H:%M')
             print(f"<tr><td></td><td colspan=\"4\">{date_str} - 
{entry.outcome}</td></tr>")
     print("</table>")
@@ -966,13 +1039,74 @@ def print_flaky_regressions(flaky_regressions: Dict[str, 
Dict], threshold_days:
         print("```")
         print("Date/Time (UTC)      Outcome    Build ID")
         print("-" * 44)
-        for entry in sorted(details['recent_executions'], key=lambda x: 
x.timestamp)[-5:]:
+        for entry in sorted(details['recent_executions'], key=lambda x: 
x.timestamp, reverse=True)[:5]:
             date_str = entry.timestamp.strftime('%Y-%m-%d %H:%M')
             print(f"{date_str:<17} {entry.outcome:<10} {entry.build_id}")
         print("```")
     
     print("</details>")
 
+def print_persistent_failing_tests(persistent_failures: Dict[str, Dict], 
threshold_days: int):
+    """Print tests that have been consistently failing over time"""
+    print("\n## Persistently Failing/Flaky Tests")
+    if not persistent_failures:
+        print("No persistently failing tests found.")
+        return
+        
+    print(f"Found {len(persistent_failures)} tests that have been consistently 
failing or flaky.")
+    
+    # Print table with test details
+    print("\n<table>")
+    print("<tr><td>Test Class</td><td>Test Case</td><td>Failure 
Rate</td><td>Total Runs</td><td>Failed/Flaky</td><td>Link</td></tr>")
+    
+    for class_name, class_details in sorted(persistent_failures.items(),
+                                          key=lambda x: x[1]['failure_rate'],
+                                          reverse=True):
+        class_link = get_develocity_class_link(class_name, threshold_days)
+        
+        # Print class row
+        print(f"<tr><td colspan=\"5\">{class_name}</td>"
+              f"<td><a href=\"{class_link}\">↗️</a></td></tr>")
+              
+        # Print test case rows
+        for test_name, test_details in 
sorted(class_details['test_cases'].items(),
+                                            key=lambda x: x[1]['failure_rate'],
+                                            reverse=True):
+            test_link = get_develocity_method_link(class_name, test_name, 
threshold_days)
+            print(f"<tr><td></td>"
+                  f"<td>{test_name}</td>"
+                  f"<td>{test_details['failure_rate']:.2%}</td>"
+                  f"<td>{test_details['total_executions']}</td>"
+                  f"<td>{test_details['failed_executions']}</td>"
+                  f"<td><a href=\"{test_link}\">↗️</a></td></tr>")
+    print("</table>")
+    
+    # Print detailed history
+    print("\n<details>")
+    print("<summary>Detailed Execution History</summary>\n")
+    
+    for class_name, class_details in sorted(persistent_failures.items(),
+                                          key=lambda x: x[1]['failure_rate'],
+                                          reverse=True):
+        print(f"\n### {class_name}")
+        print(f"* Overall Failure Rate: {class_details['failure_rate']:.2%}")
+        print(f"* Total Executions: {class_details['total_executions']}")
+        print(f"* Failed/Flaky Executions: 
{class_details['failed_executions']}")
+        
+        for test_name, test_details in 
sorted(class_details['test_cases'].items(),
+                                            key=lambda x: x[1]['failure_rate'],
+                                            reverse=True):
+            print("\nRecent Executions:")
+            print("```")
+            print("Date/Time (UTC)      Outcome    Build ID")
+            print("-" * 44)
+            for entry in sorted(test_details['timeline'], key=lambda x: 
x.timestamp, reverse=True)[:5]:
+                date_str = entry.timestamp.strftime('%Y-%m-%d %H:%M')
+                print(f"{date_str:<17} {entry.outcome:<10} {entry.build_id}")
+            print("```")
+    
+    print("</details>")
+
 def print_cleared_tests(cleared_tests: Dict[str, Dict], threshold_days: int, 
test_type: str = None):
     """Print tests that are ready to be unquarantined"""
     print("\n## Cleared Tests (Ready for Unquarantine)")
@@ -1036,7 +1170,7 @@ def print_cleared_tests(cleared_tests: Dict[str, Dict], 
threshold_days: int, tes
             print("```")
             print("Date/Time (UTC)      Outcome    Build ID")
             print("-" * 44)
-            for entry in test_case['recent_executions']:
+            for entry in sorted(test_case['recent_executions'], key=lambda x: 
x.timestamp, reverse=True)[:5]:
                 date_str = entry.timestamp.strftime('%Y-%m-%d %H:%M')
                 print(f"{date_str:<17} {entry.outcome:<10} {entry.build_id}")
             print("```")
@@ -1103,6 +1237,13 @@ def main():
             success_threshold=SUCCESS_THRESHOLD
         )
         
+        # Get persistent failing tests (add after getting regular_results)
+        persistent_failures = analyzer.get_persistent_failing_tests(
+            regular_results,
+            min_failure_rate=0.2,  # 20% failure rate threshold
+            min_executions=5
+        )
+        
         # Print report header
         print(f"\n# Flaky Test Report for 
{datetime.now(pytz.UTC).strftime('%Y-%m-%d')}")
         print(f"This report was run on 
{datetime.now(pytz.UTC).strftime('%Y-%m-%d %H:%M:%S')} UTC")
@@ -1110,6 +1251,7 @@ def main():
         # Print each section
         print_most_problematic_tests(problematic_tests, 
QUARANTINE_THRESHOLD_DAYS, test_type="quarantinedTest")
         print_flaky_regressions(flaky_regressions, QUARANTINE_THRESHOLD_DAYS)
+        print_persistent_failing_tests(persistent_failures, 
QUARANTINE_THRESHOLD_DAYS)
         print_cleared_tests(cleared_tests, QUARANTINE_THRESHOLD_DAYS, 
test_type="quarantinedTest")
 
     except Exception as e:

(kafka) branch trunk updated: KAFKA-18416 - added a new report section to catch persistent flaky tests (#18434)

Reply via email to