This is an automated email from the ASF dual-hosted git repository.

kaxilnaik pushed a commit to branch v2-10-test
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/v2-10-test by this push:
     new 2860f81d7e Remove Scarf analytics from Airflow Webserver (#43346) 
(#43348)
2860f81d7e is described below

commit 2860f81d7e0e822531ebc90f6c9b4882f8ae82de
Author: Kaxil Naik <[email protected]>
AuthorDate: Thu Oct 24 15:04:14 2024 +0100

    Remove Scarf analytics from Airflow Webserver (#43346) (#43348)
    
    Since we have already made decisions about what we want to do for Plugins 
in Airflow 3 and revamping the entire UI, this data isn't that important. There 
were also concerns raised about de-depulications and other things.
    
    (cherry picked from commit dc25301f35e78f5058bca9eb8436ec31dc3a2962)
---
 airflow/utils/usage_data_collection.py    | 24 ----------------
 airflow/www/templates/airflow/dags.html   |  3 --
 airflow/www/views.py                      | 47 +------------------------------
 docs/apache-airflow/faq.rst               |  3 --
 tests/utils/test_usage_data_collection.py | 18 ------------
 tests/www/views/test_views.py             | 38 -------------------------
 tests/www/views/test_views_home.py        | 14 ---------
 7 files changed, 1 insertion(+), 146 deletions(-)

diff --git a/airflow/utils/usage_data_collection.py 
b/airflow/utils/usage_data_collection.py
index def8ce983a..fe86a2da1c 100644
--- a/airflow/utils/usage_data_collection.py
+++ b/airflow/utils/usage_data_collection.py
@@ -33,7 +33,6 @@ from packaging.version import parse
 
 from airflow import __version__ as airflow_version, settings
 from airflow.configuration import conf
-from airflow.plugins_manager import get_plugin_info
 
 
 def usage_data_collection():
@@ -97,26 +96,3 @@ def get_executor() -> str:
 def get_python_version() -> str:
     # Cut only major+minor from the python version string (e.g. 3.10.12 --> 
3.10)
     return ".".join(platform.python_version().split(".")[0:2])
-
-
-def get_plugin_counts() -> dict[str, int]:
-    plugin_info = get_plugin_info()
-
-    return {
-        "plugins": len(plugin_info),
-        "flask_blueprints": sum(len(x["flask_blueprints"]) for x in 
plugin_info),
-        "appbuilder_views": sum(len(x["appbuilder_views"]) for x in 
plugin_info),
-        "appbuilder_menu_items": sum(len(x["appbuilder_menu_items"]) for x in 
plugin_info),
-        "timetables": sum(len(x["timetables"]) for x in plugin_info),
-    }
-
-
-def to_bucket(counter: int) -> str:
-    """As we don't want to have preceise numbers, make number into a bucket."""
-    if counter == 0:
-        return "0"
-    buckets = [0, 5, 10, 20, 50, 100, 200, 500, 1000, 2000]
-    for idx, val in enumerate(buckets[1:]):
-        if buckets[idx] < counter and counter <= val:
-            return f"{buckets[idx] + 1}-{val}"
-    return f"{buckets[-1]}+"
diff --git a/airflow/www/templates/airflow/dags.html 
b/airflow/www/templates/airflow/dags.html
index d02b70a396..08a0d58a14 100644
--- a/airflow/www/templates/airflow/dags.html
+++ b/airflow/www/templates/airflow/dags.html
@@ -488,7 +488,4 @@
       return false;
     }
   </script>
-  {% if scarf_url %}
-  <img referrerpolicy="no-referrer" src="{{ scarf_url }}" width="0" height="0" 
alt="" style="display:none;" />
-  {% endif %}
 {% endblock %}
diff --git a/airflow/www/views.py b/airflow/www/views.py
index af60d0aa58..5e8ef6bb7f 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -118,7 +118,7 @@ from airflow.ti_deps.dependencies_deps import 
SCHEDULER_QUEUED_DEPS
 from airflow.timetables._cron import CronMixin
 from airflow.timetables.base import DataInterval, TimeRestriction
 from airflow.timetables.simple import ContinuousTimetable
-from airflow.utils import json as utils_json, timezone, usage_data_collection, 
yaml
+from airflow.utils import json as utils_json, timezone, yaml
 from airflow.utils.airflow_flask_app import get_airflow_app
 from airflow.utils.dag_edges import dag_edges
 from airflow.utils.db import get_query_count
@@ -219,45 +219,6 @@ def get_safe_url(url):
     return redirect_url.geturl()
 
 
-def build_scarf_url(dags_count: int) -> str:
-    """
-    Build the URL for the Scarf usage data collection.
-
-    :meta private:
-    """
-    if not settings.is_usage_data_collection_enabled():
-        return ""
-
-    scarf_domain = "https://apacheairflow.gateway.scarf.sh";
-    platform_sys, platform_arch = usage_data_collection.get_platform_info()
-    db_version = usage_data_collection.get_database_version()
-    db_name = usage_data_collection.get_database_name()
-    executor = usage_data_collection.get_executor()
-    python_version = usage_data_collection.get_python_version()
-    plugin_counts = usage_data_collection.get_plugin_counts()
-    plugins_count = plugin_counts["plugins"]
-    flask_blueprints_count = plugin_counts["flask_blueprints"]
-    appbuilder_views_count = plugin_counts["appbuilder_views"]
-    appbuilder_menu_items_count = plugin_counts["appbuilder_menu_items"]
-    timetables_count = plugin_counts["timetables"]
-    dag_bucket = usage_data_collection.to_bucket(dags_count)
-    plugins_bucket = usage_data_collection.to_bucket(plugins_count)
-    timetable_bucket = usage_data_collection.to_bucket(timetables_count)
-
-    # Path Format:
-    # 
/{version}/{python_version}/{platform}/{arch}/{database}/{db_version}/{executor}/{num_dags}/{plugin_count}/{flask_blueprint_count}/{appbuilder_view_count}/{appbuilder_menu_item_count}/{timetables}
-    #
-    # This path redirects to a Pixel tracking URL
-    scarf_url = (
-        f"{scarf_domain}/webserver"
-        f"/{version}/{python_version}"
-        
f"/{platform_sys}/{platform_arch}/{db_name}/{db_version}/{executor}/{dag_bucket}"
-        
f"/{plugins_bucket}/{flask_blueprints_count}/{appbuilder_views_count}/{appbuilder_menu_items_count}/{timetable_bucket}"
-    )
-
-    return scarf_url
-
-
 def get_date_time_num_runs_dag_runs_form_data(www_request, session, dag):
     """Get Execution Data, Base Date & Number of runs from a Request."""
     date_time = www_request.args.get("execution_date")
@@ -1129,11 +1090,6 @@ class Airflow(AirflowBaseView):
                     "warning",
                 )
 
-        try:
-            scarf_url = build_scarf_url(dags_count=all_dags_count)
-        except Exception:
-            scarf_url = ""
-
         return self.render_template(
             "airflow/dags.html",
             dags=dags,
@@ -1173,7 +1129,6 @@ class Airflow(AirflowBaseView):
             sorting_direction=arg_sorting_direction,
             auto_refresh_interval=conf.getint("webserver", 
"auto_refresh_interval"),
             dataset_triggered_next_run_info=dataset_triggered_next_run_info,
-            scarf_url=scarf_url,
             file_tokens=file_tokens,
         )
 
diff --git a/docs/apache-airflow/faq.rst b/docs/apache-airflow/faq.rst
index 791cfc39a0..6021ba514a 100644
--- a/docs/apache-airflow/faq.rst
+++ b/docs/apache-airflow/faq.rst
@@ -545,6 +545,3 @@ The telemetry data collected is limited to the following:
 - Operating system & machine architecture
 - Executor
 - Metadata DB type & its version
-- Number of DAGs
-- Number of Airflow plugins
-- Number of timetables, Flask blueprints, Flask AppBuilder views, and Flask 
Appbuilder menu items from Airflow plugins
diff --git a/tests/utils/test_usage_data_collection.py 
b/tests/utils/test_usage_data_collection.py
index b104d1bfe3..bc97367208 100644
--- a/tests/utils/test_usage_data_collection.py
+++ b/tests/utils/test_usage_data_collection.py
@@ -27,7 +27,6 @@ from airflow.configuration import conf
 from airflow.utils.usage_data_collection import (
     get_database_version,
     get_python_version,
-    to_bucket,
     usage_data_collection,
 )
 
@@ -101,20 +100,3 @@ def test_get_database_version(version_info, 
expected_version):
 def test_get_python_version(version_info, expected_version):
     with mock.patch("platform.python_version", return_value=version_info):
         assert get_python_version() == expected_version
-
-
[email protected](
-    "counter, expected_bucket",
-    [
-        (0, "0"),
-        (1, "1-5"),
-        (5, "1-5"),
-        (6, "6-10"),
-        (11, "11-20"),
-        (20, "11-20"),
-        (21, "21-50"),
-        (10000, "2000+"),
-    ],
-)
-def test_to_bucket(counter, expected_bucket):
-    assert to_bucket(counter) == expected_bucket
diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py
index 3cc6a87571..01d1bb731e 100644
--- a/tests/www/views/test_views.py
+++ b/tests/www/views/test_views.py
@@ -25,7 +25,6 @@ from unittest.mock import patch
 import pytest
 from markupsafe import Markup
 
-from airflow import __version__ as airflow_version
 from airflow.configuration import (
     initialize_config,
     write_default_airflow_configuration_if_needed,
@@ -36,7 +35,6 @@ from airflow.utils.docs import get_doc_url_for_provider
 from airflow.utils.task_group import TaskGroup
 from airflow.www.views import (
     ProviderView,
-    build_scarf_url,
     get_key_paths,
     get_safe_url,
     get_task_stats_from_query,
@@ -597,39 +595,3 @@ def test_invalid_dates(app, admin_client, url, content):
 
     assert resp.status_code == 400
     assert re.search(content, resp.get_data().decode())
-
-
[email protected]("enabled", [False, True])
-@patch("airflow.utils.usage_data_collection.get_platform_info", 
return_value=("Linux", "x86_64"))
-@patch("airflow.utils.usage_data_collection.get_database_version", 
return_value="12.3")
-@patch("airflow.utils.usage_data_collection.get_database_name", 
return_value="postgres")
-@patch("airflow.utils.usage_data_collection.get_executor", 
return_value="SequentialExecutor")
-@patch("airflow.utils.usage_data_collection.get_python_version", 
return_value="3.8")
-@patch("airflow.utils.usage_data_collection.get_plugin_counts")
-def test_build_scarf_url(
-    get_plugin_counts,
-    get_python_version,
-    get_executor,
-    get_database_name,
-    get_database_version,
-    get_platform_info,
-    enabled,
-):
-    get_plugin_counts.return_value = {
-        "plugins": 10,
-        "flask_blueprints": 15,
-        "appbuilder_views": 20,
-        "appbuilder_menu_items": 25,
-        "timetables": 30,
-    }
-    with patch("airflow.settings.is_usage_data_collection_enabled", 
return_value=enabled):
-        result = build_scarf_url(5)
-        expected_url = (
-            "https://apacheairflow.gateway.scarf.sh/webserver/";
-            
f"{airflow_version}/3.8/Linux/x86_64/postgres/12.3/SequentialExecutor/1-5"
-            f"/6-10/15/20/25/21-50"
-        )
-        if enabled:
-            assert result == expected_url
-        else:
-            assert result == ""
diff --git a/tests/www/views/test_views_home.py 
b/tests/www/views/test_views_home.py
index 6f0a7b8cbd..531748b988 100644
--- a/tests/www/views/test_views_home.py
+++ b/tests/www/views/test_views_home.py
@@ -454,20 +454,6 @@ def test_sorting_home_view(url, lower_key, greater_key, 
user_client, working_dag
     assert lower_index < greater_index
 
 
[email protected]("is_enabled, should_have_pixel", [(False, False), 
(True, True)])
-def test_analytics_pixel(user_client, is_enabled, should_have_pixel):
-    """
-    Test that the analytics pixel is not included when the feature is disabled
-    """
-    with mock.patch("airflow.settings.is_usage_data_collection_enabled", 
return_value=is_enabled):
-        resp = user_client.get("home", follow_redirects=True)
-
-    if should_have_pixel:
-        check_content_in_response("apacheairflow.gateway.scarf.sh", resp)
-    else:
-        check_content_not_in_response("apacheairflow.gateway.scarf.sh", resp)
-
-
 @pytest.mark.parametrize(
     "url, filter_tags_cookie_val, filter_lastrun_cookie_val, 
expected_filter_tags, expected_filter_lastrun",
     [

Reply via email to