This is an automated email from the ASF dual-hosted git repository.
kaxilnaik pushed a commit to branch v2-10-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v2-10-test by this push:
new 2860f81d7e Remove Scarf analytics from Airflow Webserver (#43346)
(#43348)
2860f81d7e is described below
commit 2860f81d7e0e822531ebc90f6c9b4882f8ae82de
Author: Kaxil Naik <[email protected]>
AuthorDate: Thu Oct 24 15:04:14 2024 +0100
Remove Scarf analytics from Airflow Webserver (#43346) (#43348)
Since we have already made decisions about what we want to do for Plugins
in Airflow 3 and revamping the entire UI, this data isn't that important. There
were also concerns raised about de-depulications and other things.
(cherry picked from commit dc25301f35e78f5058bca9eb8436ec31dc3a2962)
---
airflow/utils/usage_data_collection.py | 24 ----------------
airflow/www/templates/airflow/dags.html | 3 --
airflow/www/views.py | 47 +------------------------------
docs/apache-airflow/faq.rst | 3 --
tests/utils/test_usage_data_collection.py | 18 ------------
tests/www/views/test_views.py | 38 -------------------------
tests/www/views/test_views_home.py | 14 ---------
7 files changed, 1 insertion(+), 146 deletions(-)
diff --git a/airflow/utils/usage_data_collection.py
b/airflow/utils/usage_data_collection.py
index def8ce983a..fe86a2da1c 100644
--- a/airflow/utils/usage_data_collection.py
+++ b/airflow/utils/usage_data_collection.py
@@ -33,7 +33,6 @@ from packaging.version import parse
from airflow import __version__ as airflow_version, settings
from airflow.configuration import conf
-from airflow.plugins_manager import get_plugin_info
def usage_data_collection():
@@ -97,26 +96,3 @@ def get_executor() -> str:
def get_python_version() -> str:
# Cut only major+minor from the python version string (e.g. 3.10.12 -->
3.10)
return ".".join(platform.python_version().split(".")[0:2])
-
-
-def get_plugin_counts() -> dict[str, int]:
- plugin_info = get_plugin_info()
-
- return {
- "plugins": len(plugin_info),
- "flask_blueprints": sum(len(x["flask_blueprints"]) for x in
plugin_info),
- "appbuilder_views": sum(len(x["appbuilder_views"]) for x in
plugin_info),
- "appbuilder_menu_items": sum(len(x["appbuilder_menu_items"]) for x in
plugin_info),
- "timetables": sum(len(x["timetables"]) for x in plugin_info),
- }
-
-
-def to_bucket(counter: int) -> str:
- """As we don't want to have preceise numbers, make number into a bucket."""
- if counter == 0:
- return "0"
- buckets = [0, 5, 10, 20, 50, 100, 200, 500, 1000, 2000]
- for idx, val in enumerate(buckets[1:]):
- if buckets[idx] < counter and counter <= val:
- return f"{buckets[idx] + 1}-{val}"
- return f"{buckets[-1]}+"
diff --git a/airflow/www/templates/airflow/dags.html
b/airflow/www/templates/airflow/dags.html
index d02b70a396..08a0d58a14 100644
--- a/airflow/www/templates/airflow/dags.html
+++ b/airflow/www/templates/airflow/dags.html
@@ -488,7 +488,4 @@
return false;
}
</script>
- {% if scarf_url %}
- <img referrerpolicy="no-referrer" src="{{ scarf_url }}" width="0" height="0"
alt="" style="display:none;" />
- {% endif %}
{% endblock %}
diff --git a/airflow/www/views.py b/airflow/www/views.py
index af60d0aa58..5e8ef6bb7f 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -118,7 +118,7 @@ from airflow.ti_deps.dependencies_deps import
SCHEDULER_QUEUED_DEPS
from airflow.timetables._cron import CronMixin
from airflow.timetables.base import DataInterval, TimeRestriction
from airflow.timetables.simple import ContinuousTimetable
-from airflow.utils import json as utils_json, timezone, usage_data_collection,
yaml
+from airflow.utils import json as utils_json, timezone, yaml
from airflow.utils.airflow_flask_app import get_airflow_app
from airflow.utils.dag_edges import dag_edges
from airflow.utils.db import get_query_count
@@ -219,45 +219,6 @@ def get_safe_url(url):
return redirect_url.geturl()
-def build_scarf_url(dags_count: int) -> str:
- """
- Build the URL for the Scarf usage data collection.
-
- :meta private:
- """
- if not settings.is_usage_data_collection_enabled():
- return ""
-
- scarf_domain = "https://apacheairflow.gateway.scarf.sh"
- platform_sys, platform_arch = usage_data_collection.get_platform_info()
- db_version = usage_data_collection.get_database_version()
- db_name = usage_data_collection.get_database_name()
- executor = usage_data_collection.get_executor()
- python_version = usage_data_collection.get_python_version()
- plugin_counts = usage_data_collection.get_plugin_counts()
- plugins_count = plugin_counts["plugins"]
- flask_blueprints_count = plugin_counts["flask_blueprints"]
- appbuilder_views_count = plugin_counts["appbuilder_views"]
- appbuilder_menu_items_count = plugin_counts["appbuilder_menu_items"]
- timetables_count = plugin_counts["timetables"]
- dag_bucket = usage_data_collection.to_bucket(dags_count)
- plugins_bucket = usage_data_collection.to_bucket(plugins_count)
- timetable_bucket = usage_data_collection.to_bucket(timetables_count)
-
- # Path Format:
- #
/{version}/{python_version}/{platform}/{arch}/{database}/{db_version}/{executor}/{num_dags}/{plugin_count}/{flask_blueprint_count}/{appbuilder_view_count}/{appbuilder_menu_item_count}/{timetables}
- #
- # This path redirects to a Pixel tracking URL
- scarf_url = (
- f"{scarf_domain}/webserver"
- f"/{version}/{python_version}"
-
f"/{platform_sys}/{platform_arch}/{db_name}/{db_version}/{executor}/{dag_bucket}"
-
f"/{plugins_bucket}/{flask_blueprints_count}/{appbuilder_views_count}/{appbuilder_menu_items_count}/{timetable_bucket}"
- )
-
- return scarf_url
-
-
def get_date_time_num_runs_dag_runs_form_data(www_request, session, dag):
"""Get Execution Data, Base Date & Number of runs from a Request."""
date_time = www_request.args.get("execution_date")
@@ -1129,11 +1090,6 @@ class Airflow(AirflowBaseView):
"warning",
)
- try:
- scarf_url = build_scarf_url(dags_count=all_dags_count)
- except Exception:
- scarf_url = ""
-
return self.render_template(
"airflow/dags.html",
dags=dags,
@@ -1173,7 +1129,6 @@ class Airflow(AirflowBaseView):
sorting_direction=arg_sorting_direction,
auto_refresh_interval=conf.getint("webserver",
"auto_refresh_interval"),
dataset_triggered_next_run_info=dataset_triggered_next_run_info,
- scarf_url=scarf_url,
file_tokens=file_tokens,
)
diff --git a/docs/apache-airflow/faq.rst b/docs/apache-airflow/faq.rst
index 791cfc39a0..6021ba514a 100644
--- a/docs/apache-airflow/faq.rst
+++ b/docs/apache-airflow/faq.rst
@@ -545,6 +545,3 @@ The telemetry data collected is limited to the following:
- Operating system & machine architecture
- Executor
- Metadata DB type & its version
-- Number of DAGs
-- Number of Airflow plugins
-- Number of timetables, Flask blueprints, Flask AppBuilder views, and Flask
Appbuilder menu items from Airflow plugins
diff --git a/tests/utils/test_usage_data_collection.py
b/tests/utils/test_usage_data_collection.py
index b104d1bfe3..bc97367208 100644
--- a/tests/utils/test_usage_data_collection.py
+++ b/tests/utils/test_usage_data_collection.py
@@ -27,7 +27,6 @@ from airflow.configuration import conf
from airflow.utils.usage_data_collection import (
get_database_version,
get_python_version,
- to_bucket,
usage_data_collection,
)
@@ -101,20 +100,3 @@ def test_get_database_version(version_info,
expected_version):
def test_get_python_version(version_info, expected_version):
with mock.patch("platform.python_version", return_value=version_info):
assert get_python_version() == expected_version
-
-
[email protected](
- "counter, expected_bucket",
- [
- (0, "0"),
- (1, "1-5"),
- (5, "1-5"),
- (6, "6-10"),
- (11, "11-20"),
- (20, "11-20"),
- (21, "21-50"),
- (10000, "2000+"),
- ],
-)
-def test_to_bucket(counter, expected_bucket):
- assert to_bucket(counter) == expected_bucket
diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py
index 3cc6a87571..01d1bb731e 100644
--- a/tests/www/views/test_views.py
+++ b/tests/www/views/test_views.py
@@ -25,7 +25,6 @@ from unittest.mock import patch
import pytest
from markupsafe import Markup
-from airflow import __version__ as airflow_version
from airflow.configuration import (
initialize_config,
write_default_airflow_configuration_if_needed,
@@ -36,7 +35,6 @@ from airflow.utils.docs import get_doc_url_for_provider
from airflow.utils.task_group import TaskGroup
from airflow.www.views import (
ProviderView,
- build_scarf_url,
get_key_paths,
get_safe_url,
get_task_stats_from_query,
@@ -597,39 +595,3 @@ def test_invalid_dates(app, admin_client, url, content):
assert resp.status_code == 400
assert re.search(content, resp.get_data().decode())
-
-
[email protected]("enabled", [False, True])
-@patch("airflow.utils.usage_data_collection.get_platform_info",
return_value=("Linux", "x86_64"))
-@patch("airflow.utils.usage_data_collection.get_database_version",
return_value="12.3")
-@patch("airflow.utils.usage_data_collection.get_database_name",
return_value="postgres")
-@patch("airflow.utils.usage_data_collection.get_executor",
return_value="SequentialExecutor")
-@patch("airflow.utils.usage_data_collection.get_python_version",
return_value="3.8")
-@patch("airflow.utils.usage_data_collection.get_plugin_counts")
-def test_build_scarf_url(
- get_plugin_counts,
- get_python_version,
- get_executor,
- get_database_name,
- get_database_version,
- get_platform_info,
- enabled,
-):
- get_plugin_counts.return_value = {
- "plugins": 10,
- "flask_blueprints": 15,
- "appbuilder_views": 20,
- "appbuilder_menu_items": 25,
- "timetables": 30,
- }
- with patch("airflow.settings.is_usage_data_collection_enabled",
return_value=enabled):
- result = build_scarf_url(5)
- expected_url = (
- "https://apacheairflow.gateway.scarf.sh/webserver/"
-
f"{airflow_version}/3.8/Linux/x86_64/postgres/12.3/SequentialExecutor/1-5"
- f"/6-10/15/20/25/21-50"
- )
- if enabled:
- assert result == expected_url
- else:
- assert result == ""
diff --git a/tests/www/views/test_views_home.py
b/tests/www/views/test_views_home.py
index 6f0a7b8cbd..531748b988 100644
--- a/tests/www/views/test_views_home.py
+++ b/tests/www/views/test_views_home.py
@@ -454,20 +454,6 @@ def test_sorting_home_view(url, lower_key, greater_key,
user_client, working_dag
assert lower_index < greater_index
[email protected]("is_enabled, should_have_pixel", [(False, False),
(True, True)])
-def test_analytics_pixel(user_client, is_enabled, should_have_pixel):
- """
- Test that the analytics pixel is not included when the feature is disabled
- """
- with mock.patch("airflow.settings.is_usage_data_collection_enabled",
return_value=is_enabled):
- resp = user_client.get("home", follow_redirects=True)
-
- if should_have_pixel:
- check_content_in_response("apacheairflow.gateway.scarf.sh", resp)
- else:
- check_content_not_in_response("apacheairflow.gateway.scarf.sh", resp)
-
-
@pytest.mark.parametrize(
"url, filter_tags_cookie_val, filter_lastrun_cookie_val,
expected_filter_tags, expected_filter_lastrun",
[