This is an automated email from the ASF dual-hosted git repository.

kaxilnaik pushed a commit to branch v2-10-test
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/v2-10-test by this push:
     new 80a19904606 Remove Scarf tracking (#45865) (#45941)
80a19904606 is described below

commit 80a19904606ecbfd1a6158cb733fbb7161f4d35f
Author: Kaxil Naik <kaxiln...@apache.org>
AuthorDate: Thu Jan 23 03:16:17 2025 +0530

    Remove Scarf tracking (#45865) (#45941)
---
 README.md                                          |   3 -
 RELEASE_NOTES.rst                                  |  11 +-
 airflow/cli/commands/scheduler_command.py          |   3 -
 airflow/config_templates/config.yml                |  22 ----
 airflow/reproducible_build.yaml                    |   4 +-
 airflow/settings.py                                |   7 --
 airflow/utils/usage_data_collection.py             | 123 ---------------------
 docs/apache-airflow/faq.rst                        |  23 ----
 .../installation/installing-from-pypi.rst          |   6 -
 tests/core/test_settings.py                        |  25 +----
 tests/utils/test_usage_data_collection.py          | 104 -----------------
 11 files changed, 8 insertions(+), 323 deletions(-)

diff --git a/README.md b/README.md
index 8da91a71f9a..0273ecbddb2 100644
--- a/README.md
+++ b/README.md
@@ -534,6 +534,3 @@ The CI infrastructure for Apache Airflow has been sponsored 
by:
 
 <a href="https://astronomer.io";><img 
src="https://assets2.astronomer.io/logos/logoForLIGHTbackground.png"; 
alt="astronomer.io" width="250px"></a>
 <a href="https://aws.amazon.com/opensource/";><img 
src="docs/integration-logos/aws/aws-cloud-alt_light...@4x.png" alt="AWS 
OpenSource" width="130px"></a>
-
-<!-- telemetry/analytics pixel: -->
-<img referrerpolicy="no-referrer-when-downgrade" 
src="https://static.scarf.sh/a.png?x-pxid=1b5a5e3c-da81-42f5-befa-42d836bf1b54"; 
alt="Tracking Pixel" />
diff --git a/RELEASE_NOTES.rst b/RELEASE_NOTES.rst
index cb7572626b4..9ff9aeb5c4f 100644
--- a/RELEASE_NOTES.rst
+++ b/RELEASE_NOTES.rst
@@ -223,6 +223,11 @@ Airflow 2.10.0 (2024-08-15)
 Significant Changes
 ^^^^^^^^^^^^^^^^^^^
 
+Scarf based telemetry: Airflow now collect telemetry data (#39510)
+""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
+Airflow integrates Scarf to collect basic usage data during operation. 
Deployments can opt-out of data collection by
+setting the ``[usage_data_collection]enabled`` option to ``False``, or the 
``SCARF_ANALYTICS=false`` environment variable.
+
 Datasets no longer trigger inactive DAGs (#38891)
 """""""""""""""""""""""""""""""""""""""""""""""""
 
@@ -271,12 +276,6 @@ Previously known as hybrid executors, this new feature 
allows Airflow to use mul
 to use a specific executor that suits its needs best. A single DAG can contain 
tasks all using different executors. Please see the Airflow documentation for
 more details. Note: This feature is still experimental. See `documentation on 
Executor 
<https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/executor/index.html#using-multiple-executors-concurrently>`_
 for a more detailed description.
 
-Scarf based telemetry: Does Airflow collect any telemetry data? (#39510)
-""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
-Airflow integrates Scarf to collect basic usage data during operation. 
Deployments can opt-out of data collection by setting the 
``[usage_data_collection]enabled`` option to False, or the 
SCARF_ANALYTICS=false environment variable.
-See `FAQ on this 
<https://airflow.apache.org/docs/apache-airflow/stable/faq.html#does-airflow-collect-any-telemetry-data>`_
 for more information.
-
-
 New Features
 """"""""""""
 - AIP-61 Hybrid Execution (`AIP-61 
<https://github.com/apache/airflow/pulls?q=is%3Apr+label%3Aarea%3Ahybrid-executors+is%3Aclosed+milestone%3A%22Airflow+2.10.0%22>`_)
diff --git a/airflow/cli/commands/scheduler_command.py 
b/airflow/cli/commands/scheduler_command.py
index 96cfe1e2852..37fd399d2e0 100644
--- a/airflow/cli/commands/scheduler_command.py
+++ b/airflow/cli/commands/scheduler_command.py
@@ -33,7 +33,6 @@ from airflow.utils import cli as cli_utils
 from airflow.utils.cli import process_subdir
 from airflow.utils.providers_configuration_loader import 
providers_configuration_loaded
 from airflow.utils.scheduler_health import serve_health_check
-from airflow.utils.usage_data_collection import usage_data_collection
 
 log = logging.getLogger(__name__)
 
@@ -54,8 +53,6 @@ def scheduler(args: Namespace):
     """Start Airflow Scheduler."""
     print(settings.HEADER)
 
-    usage_data_collection()
-
     run_command_with_daemon_option(
         args=args,
         process_name="scheduler",
diff --git a/airflow/config_templates/config.yml 
b/airflow/config_templates/config.yml
index 613c5e3394a..06f51aff786 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -2735,25 +2735,3 @@ sensors:
       type: float
       example: ~
       default: "604800"
-usage_data_collection:
-  description: |
-    Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic 
platform and usage data
-    during operation. This data assists Airflow maintainers in better 
understanding how Airflow is used.
-    Insights gained from this telemetry are critical for prioritizing patches, 
minor releases, and
-    security fixes. Additionally, this information supports key decisions 
related to the development road map.
-    Check the FAQ doc for more information on what data is collected.
-
-    Deployments can opt-out of analytics by setting the ``enabled`` option
-    to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable.
-    Individual users can easily opt-out of analytics in various ways 
documented in the
-    `Scarf Do Not Track docs <https://docs.scarf.sh/gateway/#do-not-track>`__.
-
-  options:
-    enabled:
-      description: |
-        Enable or disable usage data collection and sending.
-      version_added: 2.10.0
-      type: boolean
-      example: ~
-      default: "True"
-      see_also: ":ref:`Usage data collection FAQ <usage-data-collection>`"
diff --git a/airflow/reproducible_build.yaml b/airflow/reproducible_build.yaml
index 9f7c6d5a100..eef20e83cad 100644
--- a/airflow/reproducible_build.yaml
+++ b/airflow/reproducible_build.yaml
@@ -1,2 +1,2 @@
-release-notes-hash: 7be47e2ddbbe1bfbd0d3f572d2b7800a
-source-date-epoch: 1736532824
+release-notes-hash: 4c64543422c2823b475306f5e634d598
+source-date-epoch: 1737575461
diff --git a/airflow/settings.py b/airflow/settings.py
index 7e9626d788f..85d56d4be85 100644
--- a/airflow/settings.py
+++ b/airflow/settings.py
@@ -800,13 +800,6 @@ def initialize():
     atexit.register(dispose_orm)
 
 
-def is_usage_data_collection_enabled() -> bool:
-    """Check if data collection is enabled."""
-    return conf.getboolean("usage_data_collection", "enabled", fallback=True) 
and (
-        os.getenv("SCARF_ANALYTICS", "").strip().lower() != "false"
-    )
-
-
 # Const stuff
 
 KILOBYTE = 1024
diff --git a/airflow/utils/usage_data_collection.py 
b/airflow/utils/usage_data_collection.py
deleted file mode 100644
index 3bdfb180fa9..00000000000
--- a/airflow/utils/usage_data_collection.py
+++ /dev/null
@@ -1,123 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""
-This module is for management of Airflow's usage data collection.
-
-This module is not part of the public interface and is subject to change at 
any time.
-
-:meta private:
-"""
-
-from __future__ import annotations
-
-import os
-import platform
-from urllib.parse import urlencode
-
-import httpx
-from packaging.version import parse
-
-from airflow import __version__ as airflow_version, settings
-from airflow.configuration import conf
-
-
-def usage_data_collection():
-    if not settings.is_usage_data_collection_enabled():
-        return
-
-    # Exclude pre-releases and dev versions
-    if _version_is_prerelease(airflow_version):
-        return
-
-    # Exclude CI environments
-    if _is_ci_environ():
-        return
-
-    scarf_domain = "https://apacheairflow.gateway.scarf.sh/scheduler";
-
-    try:
-        platform_sys, arch = get_platform_info()
-
-        params = {
-            "version": airflow_version,
-            "python_version": get_python_version(),
-            "platform": platform_sys,
-            "arch": arch,
-            "database": get_database_name(),
-            "db_version": get_database_version(),
-            "executor": get_executor(),
-        }
-
-        query_string = urlencode(params)
-        scarf_url = f"{scarf_domain}?{query_string}"
-
-        httpx.get(scarf_url, timeout=5.0)
-    except Exception:
-        pass
-
-
-def _version_is_prerelease(version: str) -> bool:
-    return parse(version).is_prerelease
-
-
-def _is_ci_environ() -> bool:
-    """Return True if running in any known CI environment."""
-    if os.getenv("CI") == "true":
-        # Generic CI variable set by many CI systems (GH Actions, Travis, 
GitLab, CircleCI, Jenkins, Heroku)
-        return True
-
-    # Other CI variables set by specific CI systems
-    ci_env_vars = {
-        "CIRCLECI",  # CircleCI
-        "CODEBUILD_BUILD_ID",  # AWS CodeBuild
-        "GITHUB_ACTIONS",  # GitHub Actions
-        "GITLAB_CI",  # GitLab CI
-        "JENKINS_URL",  # Jenkins
-        "TF_BUILD",  # Azure Pipelines
-        "TRAVIS",  # Travis CI
-    }
-
-    return any(var in os.environ for var in ci_env_vars)
-
-
-def get_platform_info() -> tuple[str, str]:
-    return platform.system(), platform.machine()
-
-
-def get_database_version() -> str:
-    if settings.engine is None:
-        return "None"
-
-    version_info = settings.engine.dialect.server_version_info
-    # Example: (1, 2, 3) -> "1.2" (cut only major+minor w/o patch)
-    return ".".join(map(str, version_info[0:2])) if version_info else "None"
-
-
-def get_database_name() -> str:
-    if settings.engine is None:
-        return "None"
-    return settings.engine.dialect.name
-
-
-def get_executor() -> str:
-    return conf.get("core", "EXECUTOR")
-
-
-def get_python_version() -> str:
-    # Cut only major+minor from the python version string (e.g. 3.10.12 --> 
3.10)
-    return ".".join(platform.python_version().split(".")[0:2])
diff --git a/docs/apache-airflow/faq.rst b/docs/apache-airflow/faq.rst
index 6021ba514ad..7cef74c5f44 100644
--- a/docs/apache-airflow/faq.rst
+++ b/docs/apache-airflow/faq.rst
@@ -522,26 +522,3 @@ This means ``explicit_defaults_for_timestamp`` is disabled 
in your mysql server
 
 #. Set ``explicit_defaults_for_timestamp = 1`` under the ``mysqld`` section in 
your ``my.cnf`` file.
 #. Restart the Mysql server.
-
-Does Airflow collect any telemetry data?
-----------------------------------------
-
-.. _usage-data-collection:
-
-Airflow integrates `Scarf <https://about.scarf.sh/>`__ to collect basic usage 
data during operation.
-This data assists Airflow maintainers in better understanding how Airflow is 
used.
-Insights gained from this data are helpful for prioritizing patches, minor 
releases, and
-security fixes. Additionally, this information supports key decisions related 
to the development road map.
-
-Deployments can opt-out of data collection by setting the 
:ref:`[usage_data_collection] enabled <config:usage_data_collection__enabled>`
-option to ``False``, or the ``SCARF_ANALYTICS=false`` environment variable.
-Individual users can easily opt-out of analytics in various ways documented in 
the
-`Scarf Do Not Track docs <https://docs.scarf.sh/gateway/#do-not-track>`__.
-
-The telemetry data collected is limited to the following:
-
-- Airflow version
-- Python version
-- Operating system & machine architecture
-- Executor
-- Metadata DB type & its version
diff --git a/docs/apache-airflow/installation/installing-from-pypi.rst 
b/docs/apache-airflow/installation/installing-from-pypi.rst
index 8c689da5e1f..a62d87d9054 100644
--- a/docs/apache-airflow/installation/installing-from-pypi.rst
+++ b/docs/apache-airflow/installation/installing-from-pypi.rst
@@ -330,12 +330,6 @@ dependencies compatible with just airflow core at the 
moment Airflow was release
     # For example: 
https://raw.githubusercontent.com/apache/airflow/constraints-|version|/constraints-no-providers-3.8.txt
     pip install "apache-airflow==${AIRFLOW_VERSION}" --constraint 
"${CONSTRAINT_URL}"
 
-
-.. note::
-
-    Airflow uses `Scarf <https://about.scarf.sh/>`__ to collect basic usage 
data during operation.
-    Check the :ref:`Usage data collection FAQ <usage-data-collection>` for 
more information about the data collected and how to opt-out.
-
 Troubleshooting
 '''''''''''''''
 
diff --git a/tests/core/test_settings.py b/tests/core/test_settings.py
index 483ef24e25f..3a0c33b08b6 100644
--- a/tests/core/test_settings.py
+++ b/tests/core/test_settings.py
@@ -31,7 +31,7 @@ from airflow.__main__ import configure_internal_api
 from airflow.api_internal.internal_api_call import InternalApiConfig
 from airflow.configuration import conf
 from airflow.exceptions import AirflowClusterPolicyViolation, 
AirflowConfigException
-from airflow.settings import _ENABLE_AIP_44, TracebackSession, 
is_usage_data_collection_enabled
+from airflow.settings import _ENABLE_AIP_44, TracebackSession
 from airflow.utils.session import create_session
 from tests.test_utils.config import conf_vars
 
@@ -368,26 +368,3 @@ def test_create_session_ctx_mgr_no_call_methods(mock_new, 
clear_internal_api):
         assert session == m
     method_calls = [x[0] for x in m.method_calls]
     assert method_calls == []  # commit and close not called when using 
internal API
-
-
-@pytest.mark.parametrize(
-    "env_var, conf_setting, is_enabled",
-    [
-        ("false", "True", False),  # env forces disable
-        ("false", "False", False),  # Both force disable
-        ("False ", "False", False),  # Both force disable
-        ("true", "True", True),  # Both enable
-        ("true", "False", False),  # Conf forces disable
-        (None, "True", True),  # Default env, conf enables
-        (None, "False", False),  # Default env, conf disables
-    ],
-)
-def test_usage_data_collection_disabled(env_var, conf_setting, is_enabled, 
clear_internal_api):
-    conf_patch = conf_vars({("usage_data_collection", "enabled"): 
conf_setting})
-
-    if env_var is not None:
-        with conf_patch, patch.dict(os.environ, {"SCARF_ANALYTICS": env_var}):
-            assert is_usage_data_collection_enabled() == is_enabled
-    else:
-        with conf_patch:
-            assert is_usage_data_collection_enabled() == is_enabled
diff --git a/tests/utils/test_usage_data_collection.py 
b/tests/utils/test_usage_data_collection.py
deleted file mode 100644
index 143bce39eca..00000000000
--- a/tests/utils/test_usage_data_collection.py
+++ /dev/null
@@ -1,104 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-from __future__ import annotations
-
-import platform
-from unittest import mock
-
-import pytest
-
-from airflow import __version__ as airflow_version
-from airflow.configuration import conf
-from airflow.utils.usage_data_collection import (
-    get_database_version,
-    get_python_version,
-    usage_data_collection,
-)
-
-
-@pytest.mark.parametrize("is_enabled, is_prerelease", [(False, True), (True, 
True)])
-@mock.patch("httpx.get")
-def test_scarf_analytics_disabled(mock_get, is_enabled, is_prerelease):
-    with mock.patch("airflow.settings.is_usage_data_collection_enabled", 
return_value=is_enabled), mock.patch(
-        "airflow.utils.usage_data_collection._version_is_prerelease", 
return_value=is_prerelease
-    ):
-        usage_data_collection()
-    mock_get.assert_not_called()
-
-
-@mock.patch("airflow.settings.is_usage_data_collection_enabled", 
return_value=True)
-@mock.patch("airflow.utils.usage_data_collection._version_is_prerelease", 
return_value=False)
-@mock.patch("airflow.utils.usage_data_collection._is_ci_environ", 
return_value=False)
-@mock.patch("airflow.utils.usage_data_collection.get_database_version", 
return_value="12.3")
-@mock.patch("airflow.utils.usage_data_collection.get_database_name", 
return_value="postgres")
-@mock.patch("httpx.get")
-def test_scarf_analytics(
-    mock_get,
-    mock_is_usage_data_collection_enabled,
-    mock_version_is_ci,
-    mock_version_is_prerelease,
-    get_database_version,
-    get_database_name,
-):
-    platform_sys = platform.system()
-    platform_machine = platform.machine()
-    python_version = get_python_version()
-    executor = conf.get("core", "EXECUTOR")
-    scarf_endpoint = "https://apacheairflow.gateway.scarf.sh/scheduler";
-    usage_data_collection()
-
-    expected_scarf_url = (
-        f"{scarf_endpoint}?version={airflow_version}"
-        f"&python_version={python_version}"
-        f"&platform={platform_sys}"
-        f"&arch={platform_machine}"
-        f"&database=postgres"
-        f"&db_version=12.3"
-        f"&executor={executor}"
-    )
-
-    mock_get.assert_called_once_with(expected_scarf_url, timeout=5.0)
-
-
-@pytest.mark.skip_if_database_isolation_mode
-@pytest.mark.db_test
-@pytest.mark.parametrize(
-    "version_info, expected_version",
-    [
-        ((1, 2, 3), "1.2"),  # Normal version tuple
-        (None, "None"),  # No version info available
-        ((1,), "1"),  # Single element version tuple
-        ((1, 2, 3, "beta", 4), "1.2"),  # Complex version tuple with strings
-    ],
-)
-def test_get_database_version(version_info, expected_version):
-    with mock.patch("airflow.settings.engine.dialect.server_version_info", 
new=version_info):
-        assert get_database_version() == expected_version
-
-
-@pytest.mark.parametrize(
-    "version_info, expected_version",
-    [
-        ("1.2.3", "1.2"),  # Normal version
-        ("4", "4"),  # Single element version
-        ("1.2.3.beta4", "1.2"),  # Complex version tuple with strings
-    ],
-)
-def test_get_python_version(version_info, expected_version):
-    with mock.patch("platform.python_version", return_value=version_info):
-        assert get_python_version() == expected_version

Reply via email to