This is an automated email from the ASF dual-hosted git repository.

dstandish pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new d4a5f4e3a7 Rename `telemetry-collection` to `usage-data-collection` 
(#39673)
d4a5f4e3a7 is described below

commit d4a5f4e3a7eb7acc42ea383fda700c3c28d40bf5
Author: Daniel Standish <[email protected]>
AuthorDate: Thu May 16 15:07:01 2024 -0700

    Rename `telemetry-collection` to `usage-data-collection` (#39673)
    
    The point here is to avoid confusion with the _other_ (and arguably of 
greater importance to users) telemetry concept, namely OTEL / metrics / stats.
    
    While at it, I made the code a little bit more provider-agnostic.
---
 airflow/cli/commands/scheduler_command.py           |  4 ++--
 airflow/config_templates/config.yml                 | 10 +++++-----
 airflow/settings.py                                 |  6 +++---
 .../utils/{scarf.py => usage_data_collection.py}    | 12 ++++++++++--
 airflow/www/views.py                                | 21 ++++++++++++---------
 docs/apache-airflow/faq.rst                         |  8 ++++----
 .../installation/installing-from-pypi.rst           |  5 ++---
 tests/core/test_settings.py                         | 10 +++++-----
 ...{test_scarf.py => test_usage_data_collection.py} | 20 ++++++++++----------
 tests/www/views/test_views.py                       | 12 ++++++------
 tests/www/views/test_views_home.py                  |  2 +-
 11 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/airflow/cli/commands/scheduler_command.py 
b/airflow/cli/commands/scheduler_command.py
index 4f943e961b..2b7c77fda9 100644
--- a/airflow/cli/commands/scheduler_command.py
+++ b/airflow/cli/commands/scheduler_command.py
@@ -33,8 +33,8 @@ from airflow.jobs.scheduler_job_runner import 
SchedulerJobRunner
 from airflow.utils import cli as cli_utils
 from airflow.utils.cli import process_subdir
 from airflow.utils.providers_configuration_loader import 
providers_configuration_loaded
-from airflow.utils.scarf import scarf_analytics
 from airflow.utils.scheduler_health import serve_health_check
+from airflow.utils.usage_data_collection import usage_data_collection
 
 log = logging.getLogger(__name__)
 
@@ -56,7 +56,7 @@ def scheduler(args: Namespace):
     """Start Airflow Scheduler."""
     print(settings.HEADER)
 
-    scarf_analytics()
+    usage_data_collection()
 
     run_command_with_daemon_option(
         args=args,
diff --git a/airflow/config_templates/config.yml 
b/airflow/config_templates/config.yml
index edfe56b45c..36fb176e95 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -2591,10 +2591,10 @@ sensors:
       type: float
       example: ~
       default: "604800"
-telemetry_collection:
+usage_data_collection:
   description: |
-    Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic 
telemetry data during operation.
-    This data assists Airflow maintainers in better understanding how Airflow 
is used.
+    Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic 
platform and usage data
+    during operation. This data assists Airflow maintainers in better 
understanding how Airflow is used.
     Insights gained from this telemetry are critical for prioritizing patches, 
minor releases, and
     security fixes. Additionally, this information supports key decisions 
related to the development road map.
     Check the FAQ doc for more information on what data is collected.
@@ -2607,9 +2607,9 @@ telemetry_collection:
   options:
     enabled:
       description: |
-        Enable or disable telemetry data collection and sending via Scarf.
+        Enable or disable usage data collection and sending.
       version_added: 2.10.0
       type: boolean
       example: ~
       default: "True"
-      see_also: ":ref:`Airflow telemetry FAQ <airflow-telemetry-faq>`"
+      see_also: ":ref:`Usage data collection FAQ <usage-data-collection>`"
diff --git a/airflow/settings.py b/airflow/settings.py
index 176d06270e..50c195f7fd 100644
--- a/airflow/settings.py
+++ b/airflow/settings.py
@@ -576,9 +576,9 @@ def initialize():
     atexit.register(dispose_orm)
 
 
-def is_telemetry_collection_enabled() -> bool:
-    """Check if scarf analytics is enabled."""
-    return conf.getboolean("telemetry_collection", "enabled", fallback=True) 
and (
+def is_usage_data_collection_enabled() -> bool:
+    """Check if data collection is enabled."""
+    return conf.getboolean("usage_data_collection", "enabled", fallback=True) 
and (
         os.getenv("SCARF_ANALYTICS", "").strip().lower() != "false"
     )
 
diff --git a/airflow/utils/scarf.py b/airflow/utils/usage_data_collection.py
similarity index 90%
rename from airflow/utils/scarf.py
rename to airflow/utils/usage_data_collection.py
index ec19480ee7..3736ba22cb 100644
--- a/airflow/utils/scarf.py
+++ b/airflow/utils/usage_data_collection.py
@@ -15,6 +15,14 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
+"""
+This module is for management of Airflow's usage data collection.
+
+This module is not part of the public interface and is subject to change at 
any time.
+
+:meta private:
+"""
+
 from __future__ import annotations
 
 import platform
@@ -27,8 +35,8 @@ from airflow import __version__ as airflow_version, settings
 from airflow.configuration import conf
 
 
-def scarf_analytics():
-    if not settings.is_telemetry_collection_enabled():
+def usage_data_collection():
+    if not settings.is_usage_data_collection_enabled():
         return
 
     # Exclude pre-releases and dev versions
diff --git a/airflow/www/views.py b/airflow/www/views.py
index 606d48e99c..9c4d735547 100644
--- a/airflow/www/views.py
+++ b/airflow/www/views.py
@@ -117,7 +117,7 @@ from airflow.ti_deps.dependencies_deps import 
SCHEDULER_QUEUED_DEPS
 from airflow.timetables._cron import CronMixin
 from airflow.timetables.base import DataInterval, TimeRestriction
 from airflow.timetables.simple import ContinuousTimetable
-from airflow.utils import json as utils_json, scarf, timezone, yaml
+from airflow.utils import json as utils_json, timezone, usage_data_collection, 
yaml
 from airflow.utils.airflow_flask_app import get_airflow_app
 from airflow.utils.dag_edges import dag_edges
 from airflow.utils.db import get_query_count
@@ -218,17 +218,20 @@ def get_safe_url(url):
 
 
 def build_scarf_url(dags_count: int) -> str:
-    """Build the URL for the Scarf telemetry collection."""
-    if not settings.is_telemetry_collection_enabled():
+    """
+    Build the URL for the Scarf usage data collection.
+
+    :meta private:
+    """
+    if not settings.is_usage_data_collection_enabled():
         return ""
 
     scarf_domain = "https://apacheairflow.gateway.scarf.sh";
-
-    platform_sys, platform_arch = scarf.get_platform_info()
-    db_version = scarf.get_database_version()
-    db_name = scarf.get_database_name()
-    executor = scarf.get_executor()
-    python_version = scarf.get_python_version()
+    platform_sys, platform_arch = usage_data_collection.get_platform_info()
+    db_version = usage_data_collection.get_database_version()
+    db_name = usage_data_collection.get_database_name()
+    executor = usage_data_collection.get_executor()
+    python_version = usage_data_collection.get_python_version()
 
     # Path Format:
     # 
/{version}/{python_version}/{platform}/{arch}/{database}/{db_version}/{executor}/{num_dags}
diff --git a/docs/apache-airflow/faq.rst b/docs/apache-airflow/faq.rst
index 31ec98b9ff..af45139fd5 100644
--- a/docs/apache-airflow/faq.rst
+++ b/docs/apache-airflow/faq.rst
@@ -526,14 +526,14 @@ This means ``explicit_defaults_for_timestamp`` is 
disabled in your mysql server
 Does Airflow collect any telemetry data?
 ----------------------------------------
 
-.. _airflow-telemetry-faq:
+.. _usage-data-collection:
 
-Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic 
telemetry data during operation.
+Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic usage 
data during operation.
 This data assists Airflow maintainers in better understanding how Airflow is 
used.
-Insights gained from this telemetry are critical for prioritizing patches, 
minor releases, and
+Insights gained from this data are helpful for prioritizing patches, minor 
releases, and
 security fixes. Additionally, this information supports key decisions related 
to the development road map.
 
-Deployments can opt-out of analytics by setting the 
:ref:`[telemetry_collection] enabled <config:telemetry_collection__enabled>`
+Deployments can opt-out of data collection by setting the 
:ref:`[usage_data_collection] enabled <config:usage_data_collection__enabled>`
 option to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable.
 Individual users can easily opt-out of analytics in various ways documented in 
the
 `Scarf Do Not Track docs <https://docs.scarf.sh/gateway/#do-not-track>`__.
diff --git a/docs/apache-airflow/installation/installing-from-pypi.rst 
b/docs/apache-airflow/installation/installing-from-pypi.rst
index 4751b54112..96758e34e7 100644
--- a/docs/apache-airflow/installation/installing-from-pypi.rst
+++ b/docs/apache-airflow/installation/installing-from-pypi.rst
@@ -333,9 +333,8 @@ dependencies compatible with just airflow core at the 
moment Airflow was release
 
 .. note::
 
-    Airflow uses `Scarf <https://about.scarf.sh/>`__ to collect basic 
telemetry data during operation.
-    Check the :ref:`Airflow telemetry FAQ <airflow-telemetry-faq>` for more 
information about the data collected
-    and how to opt-out.
+    Airflow uses `Scarf <https://about.scarf.sh/>`__ to collect basic usage 
data during operation.
+    Check the :ref:`Usage data collection FAQ <usage-data-collection>` for 
more information about the data collected and how to opt-out.
 
 Troubleshooting
 '''''''''''''''
diff --git a/tests/core/test_settings.py b/tests/core/test_settings.py
index c2b4938421..c7df4e8d64 100644
--- a/tests/core/test_settings.py
+++ b/tests/core/test_settings.py
@@ -28,7 +28,7 @@ import pytest
 
 from airflow.api_internal.internal_api_call import InternalApiConfig
 from airflow.exceptions import AirflowClusterPolicyViolation, 
AirflowConfigException
-from airflow.settings import _ENABLE_AIP_44, TracebackSession, 
is_telemetry_collection_enabled
+from airflow.settings import _ENABLE_AIP_44, TracebackSession, 
is_usage_data_collection_enabled
 from airflow.utils.session import create_session
 from tests.test_utils.config import conf_vars
 
@@ -338,12 +338,12 @@ def test_create_session_ctx_mgr_no_call_methods(mock_new, 
clear_internal_api):
         (None, "False", False),  # Default env, conf disables
     ],
 )
-def test_telemetry_collection_disabled(env_var, conf_setting, is_enabled):
-    conf_patch = conf_vars({("telemetry_collection", "enabled"): conf_setting})
+def test_usage_data_collection_disabled(env_var, conf_setting, is_enabled):
+    conf_patch = conf_vars({("usage_data_collection", "enabled"): 
conf_setting})
 
     if env_var is not None:
         with conf_patch, patch.dict(os.environ, {"SCARF_ANALYTICS": env_var}):
-            assert is_telemetry_collection_enabled() == is_enabled
+            assert is_usage_data_collection_enabled() == is_enabled
     else:
         with conf_patch:
-            assert is_telemetry_collection_enabled() == is_enabled
+            assert is_usage_data_collection_enabled() == is_enabled
diff --git a/tests/utils/test_scarf.py 
b/tests/utils/test_usage_data_collection.py
similarity index 76%
rename from tests/utils/test_scarf.py
rename to tests/utils/test_usage_data_collection.py
index 507ce0357b..bb7710e88f 100644
--- a/tests/utils/test_scarf.py
+++ b/tests/utils/test_usage_data_collection.py
@@ -24,27 +24,27 @@ import pytest
 
 from airflow import __version__ as airflow_version
 from airflow.configuration import conf
-from airflow.utils.scarf import get_database_version, scarf_analytics
+from airflow.utils.usage_data_collection import get_database_version, 
usage_data_collection
 
 
 @pytest.mark.parametrize("is_enabled, is_prerelease", [(False, True), (True, 
True)])
 @mock.patch("httpx.get")
 def test_scarf_analytics_disabled(mock_get, is_enabled, is_prerelease):
-    with mock.patch("airflow.settings.is_telemetry_collection_enabled", 
return_value=is_enabled), mock.patch(
-        "airflow.utils.scarf._version_is_prerelease", 
return_value=is_prerelease
+    with mock.patch("airflow.settings.is_usage_data_collection_enabled", 
return_value=is_enabled), mock.patch(
+        "airflow.utils.usage_data_collection._version_is_prerelease", 
return_value=is_prerelease
     ):
-        scarf_analytics()
+        usage_data_collection()
     mock_get.assert_not_called()
 
 
[email protected]("airflow.settings.is_telemetry_collection_enabled", 
return_value=True)
[email protected]("airflow.utils.scarf._version_is_prerelease", return_value=False)
[email protected]("airflow.utils.scarf.get_database_version", return_value="12.3")
[email protected]("airflow.utils.scarf.get_database_name", return_value="postgres")
[email protected]("airflow.settings.is_usage_data_collection_enabled", 
return_value=True)
[email protected]("airflow.utils.usage_data_collection._version_is_prerelease", 
return_value=False)
[email protected]("airflow.utils.usage_data_collection.get_database_version", 
return_value="12.3")
[email protected]("airflow.utils.usage_data_collection.get_database_name", 
return_value="postgres")
 @mock.patch("httpx.get")
 def test_scarf_analytics(
     mock_get,
-    mock_is_telemetry_collection_enabled,
+    mock_is_usage_data_collection_enabled,
     mock_version_is_prerelease,
     get_database_version,
     get_database_name,
@@ -54,7 +54,7 @@ def test_scarf_analytics(
     python_version = platform.python_version()
     executor = conf.get("core", "EXECUTOR")
     scarf_endpoint = "https://apacheairflow.gateway.scarf.sh/scheduler";
-    scarf_analytics()
+    usage_data_collection()
 
     expected_scarf_url = (
         f"{scarf_endpoint}?version={airflow_version}"
diff --git a/tests/www/views/test_views.py b/tests/www/views/test_views.py
index 527e3ff5e4..067f556bb7 100644
--- a/tests/www/views/test_views.py
+++ b/tests/www/views/test_views.py
@@ -531,11 +531,11 @@ def test_invalid_dates(app, admin_client, url, content):
 
 
 @pytest.mark.parametrize("enabled, dags_count", [(False, 5), (True, 5)])
-@patch("airflow.utils.scarf.get_platform_info", return_value=("Linux", 
"x86_64"))
-@patch("airflow.utils.scarf.get_database_version", return_value="12.3")
-@patch("airflow.utils.scarf.get_database_name", return_value="postgres")
-@patch("airflow.utils.scarf.get_executor", return_value="SequentialExecutor")
-@patch("airflow.utils.scarf.get_python_version", return_value="3.8.5")
+@patch("airflow.utils.usage_data_collection.get_platform_info", 
return_value=("Linux", "x86_64"))
+@patch("airflow.utils.usage_data_collection.get_database_version", 
return_value="12.3")
+@patch("airflow.utils.usage_data_collection.get_database_name", 
return_value="postgres")
+@patch("airflow.utils.usage_data_collection.get_executor", 
return_value="SequentialExecutor")
+@patch("airflow.utils.usage_data_collection.get_python_version", 
return_value="3.8.5")
 def test_build_scarf_url(
     get_platform_info,
     get_database_version,
@@ -545,7 +545,7 @@ def test_build_scarf_url(
     enabled,
     dags_count,
 ):
-    with patch("airflow.settings.is_telemetry_collection_enabled", 
return_value=enabled):
+    with patch("airflow.settings.is_usage_data_collection_enabled", 
return_value=enabled):
         result = build_scarf_url(dags_count)
         expected_url = (
             "https://apacheairflow.gateway.scarf.sh/webserver/";
diff --git a/tests/www/views/test_views_home.py 
b/tests/www/views/test_views_home.py
index 52011c96cf..23f0a80210 100644
--- a/tests/www/views/test_views_home.py
+++ b/tests/www/views/test_views_home.py
@@ -458,7 +458,7 @@ def test_analytics_pixel(user_client, is_enabled, 
should_have_pixel):
     """
     Test that the analytics pixel is not included when the feature is disabled
     """
-    with mock.patch("airflow.settings.is_telemetry_collection_enabled", 
return_value=is_enabled):
+    with mock.patch("airflow.settings.is_usage_data_collection_enabled", 
return_value=is_enabled):
         resp = user_client.get("home", follow_redirects=True)
 
     if should_have_pixel:

Reply via email to