(kafka) branch trunk updated: KAFKA-18223 Add GHA to run report [2/n] (#18170)

davidarthur Fri, 13 Dec 2024 18:00:35 -0800

This is an automated email from the ASF dual-hosted git repository.

davidarthur pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/kafka.git



The following commit(s) were added to refs/heads/trunk by this push:
     new 369b8b56073 KAFKA-18223 Add GHA to run report [2/n] (#18170)
369b8b56073 is described below

commit 369b8b56073f1f25d62aa0253b83f80d92cbefdc
Author: David Arthur <[email protected]>
AuthorDate: Fri Dec 13 20:59:04 2024 -0500

    KAFKA-18223 Add GHA to run report [2/n] (#18170)
    
    Run the flaky test report daily at 6am UTC.
    
    Reviewers: Jun Rao <[email protected]>, Chia-Ping Tsai <[email protected]>
---
 .github/scripts/develocity_reports.py  | 105 ++++++++++++++++-----------------
 .github/workflows/generate-reports.yml |  43 ++++++++++++++
 2 files changed, 95 insertions(+), 53 deletions(-)

diff --git a/.github/scripts/develocity_reports.py 
b/.github/scripts/develocity_reports.py
index 38cf9c47d04..43285a336fd 100644
--- a/.github/scripts/develocity_reports.py
+++ b/.github/scripts/develocity_reports.py
@@ -729,27 +729,31 @@ class TestAnalyzer:
 
 def print_summary(problematic_tests: Dict[str, Dict], flaky_regressions: 
Dict[str, Dict]):
     """Print a summary of the most problematic tests at the top of the 
report"""
-    print("\nSummary of Most Problematic Tests")
-    print("=" * 50)
+    print("\n## Summary of Most Problematic Tests")
 
     # Combine and sort all test cases by failure rate
     all_problem_cases = []
     
     # Process problematic quarantined tests
-    for class_name, details in problematic_tests.items():
+    if len(problematic_tests) > 0:
+        print(f"Found {len(problematic_tests)} tests that have been 
quarantined for a while and are still flaky.")
+    for full_class_name, details in problematic_tests.items():
         for test_case in details['test_cases']:
             total_runs = test_case.outcome_distribution.total
+            method_name = test_case.name.split('.')[-1]
             if total_runs > 0:
                 failure_rate = (test_case.outcome_distribution.failed + 
                               test_case.outcome_distribution.flaky) / 
total_runs
                 all_problem_cases.append({
-                    'class': class_name,
-                    'method': test_case.name.split('.')[-1],
+                    'class': full_class_name,
+                    'method': method_name,
                     'failure_rate': failure_rate,
                     'total_runs': total_runs
                 })
     
     # Process flaky regressions
+    if len(flaky_regressions) > 0:
+        print(f"Found {len(flaky_regressions)} tests that have started 
recently failing.")
     for test_name, details in flaky_regressions.items():
         all_problem_cases.append({
             'class': test_name,
@@ -757,7 +761,7 @@ def print_summary(problematic_tests: Dict[str, Dict], 
flaky_regressions: Dict[st
             'failure_rate': details['recent_flaky_rate'],
             'total_runs': len(details['recent_executions'])
         })
-    
+
     # Sort by failure rate descending
     sorted_cases = sorted(all_problem_cases, 
                          key=lambda x: x['failure_rate'], 
@@ -769,23 +773,32 @@ def print_summary(problematic_tests: Dict[str, Dict], 
flaky_regressions: Dict[st
         if case['class'] not in by_class:
             by_class[case['class']] = []
         by_class[case['class']].append(case)
-    
+
     # Print summary
-    for class_name, cases in by_class.items():
-        print(f"\n{class_name}")
+    print("<table><tr><td>Class</td><td>Test Case</td><td>Failure 
Rate</td><td>Build Scans</td></tr>")
+    for full_class_name, cases in by_class.items():
+        print(f"<tr><td colspan=\"4\">{full_class_name}</td></tr>")
         for case in cases:
             method = case['method']
             if method != 'N/A':
-                print(f"  → {method:<60} {case['failure_rate']:.2%}")
+                
print(f"<tr><td></td><td>{method:<60}</td><td>{case['failure_rate']:.2%}</td><td>{case['total_runs']}</td></tr>")
             else:
-                print(f"  → Class-level flakiness rate: 
{case['failure_rate']:.2%}")
-    
-    print("\n" + "=" * 50)
+                
print(f"<tr><td></td><td></td><td>{case['failure_rate']:.2%}</td><td>{case['total_runs']}</td></tr>")
+    print("</table>")
 
 def main():
+    token = None
+    if os.environ.get("DEVELOCITY_ACCESS_TOKEN"):
+        token = os.environ.get("DEVELOCITY_ACCESS_TOKEN")
+    elif os.environ.get("GE_ACCESS_TOKEN"):
+        # Special case for when we run in GHA
+        token = 
os.environ.get("GE_ACCESS_TOKEN").removeprefix("ge.apache.org=")
+    else:
+        print("No auth token was specified. You must set 
DEVELOCITY_ACCESS_TOKEN to your personal access token.")
+        exit(1)
+
     # Configuration
     BASE_URL = "https://ge.apache.org";
-    AUTH_TOKEN = os.environ.get("DEVELOCITY_ACCESS_TOKEN")
     PROJECT = "kafka"
     QUARANTINE_THRESHOLD_DAYS = 7
     MIN_FAILURE_RATE = 0.1
@@ -793,7 +806,7 @@ def main():
     SUCCESS_THRESHOLD = 0.7  # For cleared tests
     MIN_FLAKY_RATE = 0.2    # For flaky regressions
 
-    analyzer = TestAnalyzer(BASE_URL, AUTH_TOKEN)
+    analyzer = TestAnalyzer(BASE_URL, token)
     
     try:
         # Get quarantined test results
@@ -832,17 +845,13 @@ def main():
         )
         
         # Print summary first
-        print(f"\nTest Analysis Report 
({datetime.now(pytz.UTC).strftime('%Y-%m-%d %H:%M:%S')} UTC)")
-        print("=" * 100)
+        print(f"\n# Flaky Test Report for 
{datetime.now(pytz.UTC).strftime('%Y-%m-%d')}")
+        print(f"This report was run on 
{datetime.now(pytz.UTC).strftime('%Y-%m-%d %H:%M:%S')} UTC")
+
         print_summary(problematic_tests, flaky_regressions)
-        
-        # Then print detailed reports
-        print("\nDetailed Test Reports")
-        print("=" * 100)
-        
+
         # Print Flaky Test Regressions
-        print("\nFlaky Test Regressions")
-        print("-" * 50)
+        print("\n## Flaky Test Regressions")
         if not flaky_regressions:
             print("No flaky test regressions found.")
         else:
@@ -855,11 +864,16 @@ def main():
                     print(f"  {entry.timestamp.strftime('%Y-%m-%d %H:%M')} - 
{entry.outcome}")
         
         # Print Cleared Tests
-        print("\nCleared Tests (Ready for Unquarantine)")
-        print("-" * 50)
+        print("\n## Cleared Tests (Ready for Unquarantine)")
         if not cleared_tests:
             print("No tests ready to be cleared from quarantine.")
         else:
+            # Print summary
+            print("<table><tr><td>Class</td><td>Test Case</td><td>Success 
Rate</td><td>Build Scans</td></tr>")
+            for test_name, details in cleared_tests.items():
+                
print(f"<tr><td>{test_name}</td><td></td><td>{details['success_rate']:.2%}</td><td>{details['total_executions']}</td></tr>")
+            print("</table>")
+
             for test_name, details in cleared_tests.items():
                 print(f"\n{test_name}")
                 print(f"Success Rate: {details['success_rate']:.2%}")
@@ -869,11 +883,11 @@ def main():
                     print(f"  {entry.timestamp.strftime('%Y-%m-%d %H:%M')} - 
{entry.outcome}")
         
         # Print Defective Tests
-        print("\nHigh-Priority Quarantined Tests")
-        print("-" * 50)
+        print("\n## High-Priority Quarantined Tests")
         if not problematic_tests:
             print("No high-priority quarantined tests found.")
         else:
+            print("These are tests which have been quarantined for several 
days and need attention.")
             sorted_tests = sorted(
                 problematic_tests.items(), 
                 key=lambda x: (x[1]['failure_rate'], x[1]['days_quarantined']),
@@ -881,32 +895,21 @@ def main():
             )
             
             print(f"\nFound {len(sorted_tests)} high-priority quarantined test 
classes:")
-            for class_name, details in sorted_tests:
+            for full_class_name, details in sorted_tests:
                 class_result = details['container_result']
-                
-                print(f"\n{class_name}")
-                print("=" * len(class_name))
-                print(f"Quarantined for {details['days_quarantined']} days")
-                print(f"Class Failure Rate: {details['failure_rate']:.2%}")
-                print(f"Recent Failure Rate: 
{details['recent_failure_rate']:.2%}")
-                print("\nClass Statistics:")
+                class_name = full_class_name.split(".")[-1]
+                print(f"### {class_name}")
+                print(f"{full_class_name} has been quarantined for 
{details['days_quarantined']} days")
+                print(f"Overall class failure: {details['failure_rate']:.2%}")
+                print(f"Recent class failure: 
{details['recent_failure_rate']:.2%}")
+                print("\nOverall Build Outcomes:")
                 print(f"  Total Runs: 
{class_result.outcome_distribution.total}")
                 print(f"  Failed: {class_result.outcome_distribution.failed}")
                 print(f"  Flaky: {class_result.outcome_distribution.flaky}")
                 print(f"  Passed: {class_result.outcome_distribution.passed}")
-                
-                # Show class timeline
-                if class_result.timeline:
-                    print(f"\nClass Recent Executions (last {min(5, 
len(class_result.timeline))} of {len(class_result.timeline)} runs):")
-                    print("  Date/Time (UTC)      Outcome    Build ID")
-                    print("  " + "-" * 48)
-                    for entry in sorted(class_result.timeline, key=lambda x: 
x.timestamp)[-5:]:
-                        date_str = entry.timestamp.strftime('%Y-%m-%d %H:%M')
-                        print(f"  {date_str:<17} {entry.outcome:<10} 
{entry.build_id}")
-                
-                print("\nTest Methods (Last 7 Days):")
-                print("  " + "-" * 48)
-                
+
+                print("\nQuarantined Methods (Last 7 Days):")
+
                 # Sort test methods by failure rate
                 sorted_methods = sorted(
                     details['test_cases'],
@@ -937,10 +940,6 @@ def main():
                                 date_str = entry.timestamp.strftime('%Y-%m-%d 
%H:%M')
                                 print(f"    {date_str:<17} {entry.outcome:<10} 
{entry.build_id}")
                 
-                print("\n" + "-" * 50)
-        
-        print("\n" + "=" * 100)
-                
     except Exception as e:
         logger.exception("Error occurred during report generation")
         print(f"Error occurred: {str(e)}")
diff --git a/.github/workflows/generate-reports.yml 
b/.github/workflows/generate-reports.yml
new file mode 100644
index 00000000000..2f11c85a338
--- /dev/null
+++ b/.github/workflows/generate-reports.yml
@@ -0,0 +1,43 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Generate Reports
+on:
+  workflow_dispatch:      # Let us run manually
+
+  schedule:
+    - cron: '0 6 * * *'   # Run daily at 6am UTC
+
+
+jobs:
+  flaky-test-report:
+    name: Flaky Test Report
+    permissions:
+      contents: read
+    runs-on: ubuntu-latest
+    steps:
+      - name: Env
+        run: printenv
+        env:
+          GITHUB_CONTEXT: ${{ toJson(github) }}
+      - name: Checkout code
+        uses: actions/checkout@v4
+      - name: Setup Python
+        uses: ./.github/actions/setup-python
+      - name: Run Report
+        env:
+          GE_ACCESS_TOKEN: ${{ secrets.GE_ACCESS_TOKEN }}
+        run: |
+          python ./.github/scripts/develocity_reports.py >> 
$GITHUB_STEP_SUMMARY
\ No newline at end of file

(kafka) branch trunk updated: KAFKA-18223 Add GHA to run report [2/n] (#18170)

Reply via email to