This is an automated email from the ASF dual-hosted git repository.

jscheffl pushed a commit to branch v2-10-test
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/v2-10-test by this push:
     new 9c3d0cc01e Make Scarf usage reporting in major+minor versions and 
counters in buckets (#41900)
9c3d0cc01e is described below

commit 9c3d0cc01ebbc449171c942e70a52cbc8cfb1846
Author: Jens Scheffler <[email protected]>
AuthorDate: Fri Aug 30 20:18:31 2024 +0200

    Make Scarf usage reporting in major+minor versions and counters in buckets 
(#41900)
    
    (cherry picked from commit 5a045196704943ef3195f3eeaacf2adae30e5ec1)
---
 airflow/utils/usage_data_collection.py    | 18 ++++++++++---
 airflow/www/views.py                      |  7 +++--
 tests/utils/test_usage_data_collection.py | 43 ++++++++++++++++++++++++++++---
 tests/www/views/test_views.py             |  6 ++---
 4 files changed, 62 insertions(+), 12 deletions(-)

diff --git a/airflow/utils/usage_data_collection.py 
b/airflow/utils/usage_data_collection.py
index 389b239ada..def8ce983a 100644
--- a/airflow/utils/usage_data_collection.py
+++ b/airflow/utils/usage_data_collection.py
@@ -80,8 +80,8 @@ def get_database_version() -> str:
         return "None"
 
     version_info = settings.engine.dialect.server_version_info
-    # Example: (1, 2, 3) -> "1.2.3"
-    return ".".join(map(str, version_info)) if version_info else "None"
+    # Example: (1, 2, 3) -> "1.2" (cut only major+minor w/o patch)
+    return ".".join(map(str, version_info[0:2])) if version_info else "None"
 
 
 def get_database_name() -> str:
@@ -95,7 +95,8 @@ def get_executor() -> str:
 
 
 def get_python_version() -> str:
-    return platform.python_version()
+    # Cut only major+minor from the python version string (e.g. 3.10.12 --> 
3.10)
+    return ".".join(platform.python_version().split(".")[0:2])
 
 
 def get_plugin_counts() -> dict[str, int]:
@@ -108,3 +109,14 @@ def get_plugin_counts() -> dict[str, int]:
         "appbuilder_menu_items": sum(len(x["appbuilder_menu_items"]) for x in 
plugin_info),
         "timetables": sum(len(x["timetables"]) for x in plugin_info),
     }
+
+
+def to_bucket(counter: int) -> str:
+    """As we don't want to have preceise numbers, make number into a bucket."""
+    if counter == 0:
+        return "0"
+    buckets = [0, 5, 10, 20, 50, 100, 200, 500, 1000, 2000]
+    for idx, val in enumerate(buckets[1:]):
+        if buckets[idx] < counter and counter <= val:
+            return f"{buckets[idx] + 1}-{val}"
+    return f"{buckets[-1]}+"
diff --git a/airflow/www/views.py b/airflow/www/views.py
index a485f84ed4..cd470807a9 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -240,6 +240,9 @@ def build_scarf_url(dags_count: int) -> str:
     appbuilder_views_count = plugin_counts["appbuilder_views"]
     appbuilder_menu_items_count = plugin_counts["appbuilder_menu_items"]
     timetables_count = plugin_counts["timetables"]
+    dag_bucket = usage_data_collection.to_bucket(dags_count)
+    plugins_bucket = usage_data_collection.to_bucket(plugins_count)
+    timetable_bucket = usage_data_collection.to_bucket(timetables_count)
 
     # Path Format:
     # 
/{version}/{python_version}/{platform}/{arch}/{database}/{db_version}/{executor}/{num_dags}/{plugin_count}/{flask_blueprint_count}/{appbuilder_view_count}/{appbuilder_menu_item_count}/{timetables}
@@ -248,8 +251,8 @@ def build_scarf_url(dags_count: int) -> str:
     scarf_url = (
         f"{scarf_domain}/webserver"
         f"/{version}/{python_version}"
-        
f"/{platform_sys}/{platform_arch}/{db_name}/{db_version}/{executor}/{dags_count}"
-        
f"/{plugins_count}/{flask_blueprints_count}/{appbuilder_views_count}/{appbuilder_menu_items_count}/{timetables_count}"
+        
f"/{platform_sys}/{platform_arch}/{db_name}/{db_version}/{executor}/{dag_bucket}"
+        
f"/{plugins_bucket}/{flask_blueprints_count}/{appbuilder_views_count}/{appbuilder_menu_items_count}/{timetable_bucket}"
     )
 
     return scarf_url
diff --git a/tests/utils/test_usage_data_collection.py 
b/tests/utils/test_usage_data_collection.py
index 5244de1a58..b104d1bfe3 100644
--- a/tests/utils/test_usage_data_collection.py
+++ b/tests/utils/test_usage_data_collection.py
@@ -24,7 +24,12 @@ import pytest
 
 from airflow import __version__ as airflow_version
 from airflow.configuration import conf
-from airflow.utils.usage_data_collection import get_database_version, 
usage_data_collection
+from airflow.utils.usage_data_collection import (
+    get_database_version,
+    get_python_version,
+    to_bucket,
+    usage_data_collection,
+)
 
 
 @pytest.mark.parametrize("is_enabled, is_prerelease", [(False, True), (True, 
True)])
@@ -51,7 +56,7 @@ def test_scarf_analytics(
 ):
     platform_sys = platform.system()
     platform_machine = platform.machine()
-    python_version = platform.python_version()
+    python_version = get_python_version()
     executor = conf.get("core", "EXECUTOR")
     scarf_endpoint = "https://apacheairflow.gateway.scarf.sh/scheduler";
     usage_data_collection()
@@ -74,12 +79,42 @@ def test_scarf_analytics(
 @pytest.mark.parametrize(
     "version_info, expected_version",
     [
-        ((1, 2, 3), "1.2.3"),  # Normal version tuple
+        ((1, 2, 3), "1.2"),  # Normal version tuple
         (None, "None"),  # No version info available
         ((1,), "1"),  # Single element version tuple
-        ((1, 2, 3, "beta", 4), "1.2.3.beta.4"),  # Complex version tuple with 
strings
+        ((1, 2, 3, "beta", 4), "1.2"),  # Complex version tuple with strings
     ],
 )
 def test_get_database_version(version_info, expected_version):
     with mock.patch("airflow.settings.engine.dialect.server_version_info", 
new=version_info):
         assert get_database_version() == expected_version
+
+
[email protected](
+    "version_info, expected_version",
+    [
+        ("1.2.3", "1.2"),  # Normal version
+        ("4", "4"),  # Single element version
+        ("1.2.3.beta4", "1.2"),  # Complex version tuple with strings
+    ],
+)
+def test_get_python_version(version_info, expected_version):
+    with mock.patch("platform.python_version", return_value=version_info):
+        assert get_python_version() == expected_version
+
+
[email protected](
+    "counter, expected_bucket",
+    [
+        (0, "0"),
+        (1, "1-5"),
+        (5, "1-5"),
+        (6, "6-10"),
+        (11, "11-20"),
+        (20, "11-20"),
+        (21, "21-50"),
+        (10000, "2000+"),
+    ],
+)
+def test_to_bucket(counter, expected_bucket):
+    assert to_bucket(counter) == expected_bucket
diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py
index 76f71c7f38..3cc6a87571 100644
--- a/tests/www/views/test_views.py
+++ b/tests/www/views/test_views.py
@@ -604,7 +604,7 @@ def test_invalid_dates(app, admin_client, url, content):
 @patch("airflow.utils.usage_data_collection.get_database_version", 
return_value="12.3")
 @patch("airflow.utils.usage_data_collection.get_database_name", 
return_value="postgres")
 @patch("airflow.utils.usage_data_collection.get_executor", 
return_value="SequentialExecutor")
-@patch("airflow.utils.usage_data_collection.get_python_version", 
return_value="3.8.5")
+@patch("airflow.utils.usage_data_collection.get_python_version", 
return_value="3.8")
 @patch("airflow.utils.usage_data_collection.get_plugin_counts")
 def test_build_scarf_url(
     get_plugin_counts,
@@ -626,8 +626,8 @@ def test_build_scarf_url(
         result = build_scarf_url(5)
         expected_url = (
             "https://apacheairflow.gateway.scarf.sh/webserver/";
-            
f"{airflow_version}/3.8.5/Linux/x86_64/postgres/12.3/SequentialExecutor/5"
-            f"/10/15/20/25/30"
+            
f"{airflow_version}/3.8/Linux/x86_64/postgres/12.3/SequentialExecutor/1-5"
+            f"/6-10/15/20/25/21-50"
         )
         if enabled:
             assert result == expected_url

Reply via email to