This is an automated email from the ASF dual-hosted git repository.
dstandish pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new d4a5f4e3a7 Rename `telemetry-collection` to `usage-data-collection`
(#39673)
d4a5f4e3a7 is described below
commit d4a5f4e3a7eb7acc42ea383fda700c3c28d40bf5
Author: Daniel Standish <[email protected]>
AuthorDate: Thu May 16 15:07:01 2024 -0700
Rename `telemetry-collection` to `usage-data-collection` (#39673)
The point here is to avoid confusion with the _other_ (and arguably of
greater importance to users) telemetry concept, namely OTEL / metrics / stats.
While at it, I made the code a little bit more provider-agnostic.
---
airflow/cli/commands/scheduler_command.py | 4 ++--
airflow/config_templates/config.yml | 10 +++++-----
airflow/settings.py | 6 +++---
.../utils/{scarf.py => usage_data_collection.py} | 12 ++++++++++--
airflow/www/views.py | 21 ++++++++++++---------
docs/apache-airflow/faq.rst | 8 ++++----
.../installation/installing-from-pypi.rst | 5 ++---
tests/core/test_settings.py | 10 +++++-----
...{test_scarf.py => test_usage_data_collection.py} | 20 ++++++++++----------
tests/www/views/test_views.py | 12 ++++++------
tests/www/views/test_views_home.py | 2 +-
11 files changed, 60 insertions(+), 50 deletions(-)
diff --git a/airflow/cli/commands/scheduler_command.py
b/airflow/cli/commands/scheduler_command.py
index 4f943e961b..2b7c77fda9 100644
--- a/airflow/cli/commands/scheduler_command.py
+++ b/airflow/cli/commands/scheduler_command.py
@@ -33,8 +33,8 @@ from airflow.jobs.scheduler_job_runner import
SchedulerJobRunner
from airflow.utils import cli as cli_utils
from airflow.utils.cli import process_subdir
from airflow.utils.providers_configuration_loader import
providers_configuration_loaded
-from airflow.utils.scarf import scarf_analytics
from airflow.utils.scheduler_health import serve_health_check
+from airflow.utils.usage_data_collection import usage_data_collection
log = logging.getLogger(__name__)
@@ -56,7 +56,7 @@ def scheduler(args: Namespace):
"""Start Airflow Scheduler."""
print(settings.HEADER)
- scarf_analytics()
+ usage_data_collection()
run_command_with_daemon_option(
args=args,
diff --git a/airflow/config_templates/config.yml
b/airflow/config_templates/config.yml
index edfe56b45c..36fb176e95 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -2591,10 +2591,10 @@ sensors:
type: float
example: ~
default: "604800"
-telemetry_collection:
+usage_data_collection:
description: |
- Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic
telemetry data during operation.
- This data assists Airflow maintainers in better understanding how Airflow
is used.
+ Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic
platform and usage data
+ during operation. This data assists Airflow maintainers in better
understanding how Airflow is used.
Insights gained from this telemetry are critical for prioritizing patches,
minor releases, and
security fixes. Additionally, this information supports key decisions
related to the development road map.
Check the FAQ doc for more information on what data is collected.
@@ -2607,9 +2607,9 @@ telemetry_collection:
options:
enabled:
description: |
- Enable or disable telemetry data collection and sending via Scarf.
+ Enable or disable usage data collection and sending.
version_added: 2.10.0
type: boolean
example: ~
default: "True"
- see_also: ":ref:`Airflow telemetry FAQ <airflow-telemetry-faq>`"
+ see_also: ":ref:`Usage data collection FAQ <usage-data-collection>`"
diff --git a/airflow/settings.py b/airflow/settings.py
index 176d06270e..50c195f7fd 100644
--- a/airflow/settings.py
+++ b/airflow/settings.py
@@ -576,9 +576,9 @@ def initialize():
atexit.register(dispose_orm)
-def is_telemetry_collection_enabled() -> bool:
- """Check if scarf analytics is enabled."""
- return conf.getboolean("telemetry_collection", "enabled", fallback=True)
and (
+def is_usage_data_collection_enabled() -> bool:
+ """Check if data collection is enabled."""
+ return conf.getboolean("usage_data_collection", "enabled", fallback=True)
and (
os.getenv("SCARF_ANALYTICS", "").strip().lower() != "false"
)
diff --git a/airflow/utils/scarf.py b/airflow/utils/usage_data_collection.py
similarity index 90%
rename from airflow/utils/scarf.py
rename to airflow/utils/usage_data_collection.py
index ec19480ee7..3736ba22cb 100644
--- a/airflow/utils/scarf.py
+++ b/airflow/utils/usage_data_collection.py
@@ -15,6 +15,14 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+"""
+This module is for management of Airflow's usage data collection.
+
+This module is not part of the public interface and is subject to change at
any time.
+
+:meta private:
+"""
+
from __future__ import annotations
import platform
@@ -27,8 +35,8 @@ from airflow import __version__ as airflow_version, settings
from airflow.configuration import conf
-def scarf_analytics():
- if not settings.is_telemetry_collection_enabled():
+def usage_data_collection():
+ if not settings.is_usage_data_collection_enabled():
return
# Exclude pre-releases and dev versions
diff --git a/airflow/www/views.py b/airflow/www/views.py
index 606d48e99c..9c4d735547 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -117,7 +117,7 @@ from airflow.ti_deps.dependencies_deps import
SCHEDULER_QUEUED_DEPS
from airflow.timetables._cron import CronMixin
from airflow.timetables.base import DataInterval, TimeRestriction
from airflow.timetables.simple import ContinuousTimetable
-from airflow.utils import json as utils_json, scarf, timezone, yaml
+from airflow.utils import json as utils_json, timezone, usage_data_collection,
yaml
from airflow.utils.airflow_flask_app import get_airflow_app
from airflow.utils.dag_edges import dag_edges
from airflow.utils.db import get_query_count
@@ -218,17 +218,20 @@ def get_safe_url(url):
def build_scarf_url(dags_count: int) -> str:
- """Build the URL for the Scarf telemetry collection."""
- if not settings.is_telemetry_collection_enabled():
+ """
+ Build the URL for the Scarf usage data collection.
+
+ :meta private:
+ """
+ if not settings.is_usage_data_collection_enabled():
return ""
scarf_domain = "https://apacheairflow.gateway.scarf.sh"
-
- platform_sys, platform_arch = scarf.get_platform_info()
- db_version = scarf.get_database_version()
- db_name = scarf.get_database_name()
- executor = scarf.get_executor()
- python_version = scarf.get_python_version()
+ platform_sys, platform_arch = usage_data_collection.get_platform_info()
+ db_version = usage_data_collection.get_database_version()
+ db_name = usage_data_collection.get_database_name()
+ executor = usage_data_collection.get_executor()
+ python_version = usage_data_collection.get_python_version()
# Path Format:
#
/{version}/{python_version}/{platform}/{arch}/{database}/{db_version}/{executor}/{num_dags}
diff --git a/docs/apache-airflow/faq.rst b/docs/apache-airflow/faq.rst
index 31ec98b9ff..af45139fd5 100644
--- a/docs/apache-airflow/faq.rst
+++ b/docs/apache-airflow/faq.rst
@@ -526,14 +526,14 @@ This means ``explicit_defaults_for_timestamp`` is
disabled in your mysql server
Does Airflow collect any telemetry data?
----------------------------------------
-.. _airflow-telemetry-faq:
+.. _usage-data-collection:
-Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic
telemetry data during operation.
+Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic usage
data during operation.
This data assists Airflow maintainers in better understanding how Airflow is
used.
-Insights gained from this telemetry are critical for prioritizing patches,
minor releases, and
+Insights gained from this data are helpful for prioritizing patches, minor
releases, and
security fixes. Additionally, this information supports key decisions related
to the development road map.
-Deployments can opt-out of analytics by setting the
:ref:`[telemetry_collection] enabled <config:telemetry_collection__enabled>`
+Deployments can opt-out of data collection by setting the
:ref:`[usage_data_collection] enabled <config:usage_data_collection__enabled>`
option to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable.
Individual users can easily opt-out of analytics in various ways documented in
the
`Scarf Do Not Track docs <https://docs.scarf.sh/gateway/#do-not-track>`__.
diff --git a/docs/apache-airflow/installation/installing-from-pypi.rst
b/docs/apache-airflow/installation/installing-from-pypi.rst
index 4751b54112..96758e34e7 100644
--- a/docs/apache-airflow/installation/installing-from-pypi.rst
+++ b/docs/apache-airflow/installation/installing-from-pypi.rst
@@ -333,9 +333,8 @@ dependencies compatible with just airflow core at the
moment Airflow was release
.. note::
- Airflow uses `Scarf <https://about.scarf.sh/>`__ to collect basic
telemetry data during operation.
- Check the :ref:`Airflow telemetry FAQ <airflow-telemetry-faq>` for more
information about the data collected
- and how to opt-out.
+ Airflow uses `Scarf <https://about.scarf.sh/>`__ to collect basic usage
data during operation.
+ Check the :ref:`Usage data collection FAQ <usage-data-collection>` for
more information about the data collected and how to opt-out.
Troubleshooting
'''''''''''''''
diff --git a/tests/core/test_settings.py b/tests/core/test_settings.py
index c2b4938421..c7df4e8d64 100644
--- a/tests/core/test_settings.py
+++ b/tests/core/test_settings.py
@@ -28,7 +28,7 @@ import pytest
from airflow.api_internal.internal_api_call import InternalApiConfig
from airflow.exceptions import AirflowClusterPolicyViolation,
AirflowConfigException
-from airflow.settings import _ENABLE_AIP_44, TracebackSession,
is_telemetry_collection_enabled
+from airflow.settings import _ENABLE_AIP_44, TracebackSession,
is_usage_data_collection_enabled
from airflow.utils.session import create_session
from tests.test_utils.config import conf_vars
@@ -338,12 +338,12 @@ def test_create_session_ctx_mgr_no_call_methods(mock_new,
clear_internal_api):
(None, "False", False), # Default env, conf disables
],
)
-def test_telemetry_collection_disabled(env_var, conf_setting, is_enabled):
- conf_patch = conf_vars({("telemetry_collection", "enabled"): conf_setting})
+def test_usage_data_collection_disabled(env_var, conf_setting, is_enabled):
+ conf_patch = conf_vars({("usage_data_collection", "enabled"):
conf_setting})
if env_var is not None:
with conf_patch, patch.dict(os.environ, {"SCARF_ANALYTICS": env_var}):
- assert is_telemetry_collection_enabled() == is_enabled
+ assert is_usage_data_collection_enabled() == is_enabled
else:
with conf_patch:
- assert is_telemetry_collection_enabled() == is_enabled
+ assert is_usage_data_collection_enabled() == is_enabled
diff --git a/tests/utils/test_scarf.py
b/tests/utils/test_usage_data_collection.py
similarity index 76%
rename from tests/utils/test_scarf.py
rename to tests/utils/test_usage_data_collection.py
index 507ce0357b..bb7710e88f 100644
--- a/tests/utils/test_scarf.py
+++ b/tests/utils/test_usage_data_collection.py
@@ -24,27 +24,27 @@ import pytest
from airflow import __version__ as airflow_version
from airflow.configuration import conf
-from airflow.utils.scarf import get_database_version, scarf_analytics
+from airflow.utils.usage_data_collection import get_database_version,
usage_data_collection
@pytest.mark.parametrize("is_enabled, is_prerelease", [(False, True), (True,
True)])
@mock.patch("httpx.get")
def test_scarf_analytics_disabled(mock_get, is_enabled, is_prerelease):
- with mock.patch("airflow.settings.is_telemetry_collection_enabled",
return_value=is_enabled), mock.patch(
- "airflow.utils.scarf._version_is_prerelease",
return_value=is_prerelease
+ with mock.patch("airflow.settings.is_usage_data_collection_enabled",
return_value=is_enabled), mock.patch(
+ "airflow.utils.usage_data_collection._version_is_prerelease",
return_value=is_prerelease
):
- scarf_analytics()
+ usage_data_collection()
mock_get.assert_not_called()
[email protected]("airflow.settings.is_telemetry_collection_enabled",
return_value=True)
[email protected]("airflow.utils.scarf._version_is_prerelease", return_value=False)
[email protected]("airflow.utils.scarf.get_database_version", return_value="12.3")
[email protected]("airflow.utils.scarf.get_database_name", return_value="postgres")
[email protected]("airflow.settings.is_usage_data_collection_enabled",
return_value=True)
[email protected]("airflow.utils.usage_data_collection._version_is_prerelease",
return_value=False)
[email protected]("airflow.utils.usage_data_collection.get_database_version",
return_value="12.3")
[email protected]("airflow.utils.usage_data_collection.get_database_name",
return_value="postgres")
@mock.patch("httpx.get")
def test_scarf_analytics(
mock_get,
- mock_is_telemetry_collection_enabled,
+ mock_is_usage_data_collection_enabled,
mock_version_is_prerelease,
get_database_version,
get_database_name,
@@ -54,7 +54,7 @@ def test_scarf_analytics(
python_version = platform.python_version()
executor = conf.get("core", "EXECUTOR")
scarf_endpoint = "https://apacheairflow.gateway.scarf.sh/scheduler"
- scarf_analytics()
+ usage_data_collection()
expected_scarf_url = (
f"{scarf_endpoint}?version={airflow_version}"
diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py
index 527e3ff5e4..067f556bb7 100644
--- a/tests/www/views/test_views.py
+++ b/tests/www/views/test_views.py
@@ -531,11 +531,11 @@ def test_invalid_dates(app, admin_client, url, content):
@pytest.mark.parametrize("enabled, dags_count", [(False, 5), (True, 5)])
-@patch("airflow.utils.scarf.get_platform_info", return_value=("Linux",
"x86_64"))
-@patch("airflow.utils.scarf.get_database_version", return_value="12.3")
-@patch("airflow.utils.scarf.get_database_name", return_value="postgres")
-@patch("airflow.utils.scarf.get_executor", return_value="SequentialExecutor")
-@patch("airflow.utils.scarf.get_python_version", return_value="3.8.5")
+@patch("airflow.utils.usage_data_collection.get_platform_info",
return_value=("Linux", "x86_64"))
+@patch("airflow.utils.usage_data_collection.get_database_version",
return_value="12.3")
+@patch("airflow.utils.usage_data_collection.get_database_name",
return_value="postgres")
+@patch("airflow.utils.usage_data_collection.get_executor",
return_value="SequentialExecutor")
+@patch("airflow.utils.usage_data_collection.get_python_version",
return_value="3.8.5")
def test_build_scarf_url(
get_platform_info,
get_database_version,
@@ -545,7 +545,7 @@ def test_build_scarf_url(
enabled,
dags_count,
):
- with patch("airflow.settings.is_telemetry_collection_enabled",
return_value=enabled):
+ with patch("airflow.settings.is_usage_data_collection_enabled",
return_value=enabled):
result = build_scarf_url(dags_count)
expected_url = (
"https://apacheairflow.gateway.scarf.sh/webserver/"
diff --git a/tests/www/views/test_views_home.py
b/tests/www/views/test_views_home.py
index 52011c96cf..23f0a80210 100644
--- a/tests/www/views/test_views_home.py
+++ b/tests/www/views/test_views_home.py
@@ -458,7 +458,7 @@ def test_analytics_pixel(user_client, is_enabled,
should_have_pixel):
"""
Test that the analytics pixel is not included when the feature is disabled
"""
- with mock.patch("airflow.settings.is_telemetry_collection_enabled",
return_value=is_enabled):
+ with mock.patch("airflow.settings.is_usage_data_collection_enabled",
return_value=is_enabled):
resp = user_client.get("home", follow_redirects=True)
if should_have_pixel: