This is an automated email from the ASF dual-hosted git repository. skrawcz pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/burr.git
commit b6b511d019e04f7d943b3f36755cb3303ccff07e Author: Stefan Krawczyk <[email protected]> AuthorDate: Sat Mar 7 22:45:29 2026 -0800 chore: remove phone-home telemetry per ASF policy Remove PostHog usage analytics that collected anonymous data on application builds, execution calls, and CLI commands. ASF projects should not phone home. The burr.telemetry module is kept as a no-op stub (disable_telemetry, is_telemetry_enabled) so existing user code that disables telemetry will not break. --- burr/cli/__main__.py | 11 +- burr/core/application.py | 17 +-- burr/core/graph.py | 2 - burr/telemetry.py | 292 ++----------------------------------------- docs/reference/index.rst | 1 - docs/reference/telemetry.rst | 66 ---------- tests/conftest.py | 4 - 7 files changed, 10 insertions(+), 383 deletions(-) diff --git a/burr/cli/__main__.py b/burr/cli/__main__.py index aeb56f7d..5202eeab 100644 --- a/burr/cli/__main__.py +++ b/burr/cli/__main__.py @@ -31,7 +31,7 @@ from pathlib import Path from types import ModuleType from typing import Optional -from burr import system, telemetry +from burr import system from burr.core.persistence import PersistedStateData from burr.integrations.base import require_plugin from burr.log_setup import setup_logging @@ -50,12 +50,6 @@ except ImportError as e: setup_logging(logging.INFO) -# TODO -- add this as a general callback to the CLI -def _telemetry_if_enabled(event: str): - if telemetry.is_telemetry_enabled(): - telemetry.create_and_send_cli_event(event) - - def _command(command: str, capture_output: bool, addl_env: dict | None = None) -> str: """Runs a simple command""" if addl_env is None: @@ -171,7 +165,6 @@ def _run_server( host: str = "127.0.0.1", backend: str = "local", ): - _telemetry_if_enabled("run_server") # TODO: Implement server running logic here # Example: Start a web server, configure ports, etc. logger.info(f"Starting server on port {port}") @@ -244,7 +237,6 @@ def demo_server(port: int): @click.option("--prod", is_flag=True, help="Publish to pypi (rather than test pypi)") @click.option("--no-wipe-dist", is_flag=True, help="Wipe the dist/ directory before building") def build_and_publish(prod: bool, no_wipe_dist: bool): - _telemetry_if_enabled("build_and_publish") git_root = _get_git_root() with cd(git_root): logger.info("Building UI -- this may take a bit...") @@ -272,7 +264,6 @@ def build_and_publish(prod: bool, no_wipe_dist: bool): @click.option("--unique-app-names", help="Use unique app names", is_flag=True) @click.option("--no-clear-current-data", help="Don't clear current data", is_flag=True) def generate_demo_data(s3_bucket, data_dir, unique_app_names: bool, no_clear_current_data: bool): - _telemetry_if_enabled("generate_demo_data") git_root = _get_git_root() # We need to add the examples directory to the path so we have all the imports # The GPT-one relies on a local import diff --git a/burr/core/application.py b/burr/core/application.py index dc8067c4..855cbdb4 100644 --- a/burr/core/application.py +++ b/burr/core/application.py @@ -44,7 +44,7 @@ from typing import ( cast, ) -from burr import system, telemetry, visibility +from burr import system, visibility from burr.common import types as burr_types from burr.core import persistence, validation from burr.core.action import ( @@ -851,8 +851,6 @@ class Application(Generic[ApplicationStateType]): spawning_parent_pointer=spawning_parent_pointer, ) - # @telemetry.capture_function_usage # todo -- capture usage when we break this up into one that isn't called internally - # This will be doable when we move sequence ID to the beginning of the function https://github.com/DAGWorks-Inc/burr/pull/73 @_call_execute_method_pre_post(ExecuteMethod.step) def step(self, inputs: Optional[Dict[str, Any]] = None) -> Optional[Tuple[Action, dict, State]]: """Performs a single step, advancing the state machine along. @@ -1012,8 +1010,6 @@ class Application(Generic[ApplicationStateType]): ) return processed_inputs - # @telemetry.capture_function_usage - # ditto with step() @_call_execute_method_pre_post(ExecuteMethod.astep) async def astep( self, inputs: Optional[Dict[str, Any]] = None @@ -1198,7 +1194,6 @@ class Application(Generic[ApplicationStateType]): ) return prior_action, result, self._state - @telemetry.capture_function_usage @_call_execute_method_pre_post(ExecuteMethod.iterate) def iterate( self, @@ -1245,7 +1240,6 @@ class Application(Generic[ApplicationStateType]): break return self._return_value_iterate(halt_before, halt_after, prior_action, result) - @telemetry.capture_function_usage @_call_execute_method_pre_post(ExecuteMethod.aiterate) async def aiterate( self, @@ -1277,7 +1271,6 @@ class Application(Generic[ApplicationStateType]): if self._should_halt_iterate(halt_before, halt_after, prior_action): break - @telemetry.capture_function_usage @_call_execute_method_pre_post(ExecuteMethod.run) def run( self, @@ -1306,7 +1299,6 @@ class Application(Generic[ApplicationStateType]): result = e.value return result - @telemetry.capture_function_usage @_call_execute_method_pre_post(ExecuteMethod.arun) async def arun( self, @@ -1338,7 +1330,6 @@ class Application(Generic[ApplicationStateType]): pass return self._return_value_iterate(halt_before, halt_after, prior_action, result) - @telemetry.capture_function_usage def stream_result( self, halt_after: list[str], @@ -1590,7 +1581,6 @@ class Application(Generic[ApplicationStateType]): generator, self._state, process_result, callback ) - @telemetry.capture_function_usage async def astream_result( self, halt_after: list[str], @@ -1863,7 +1853,6 @@ class Application(Generic[ApplicationStateType]): generator, self._state, process_result, callback ) - @telemetry.capture_function_usage @_call_execute_method_pre_post(ExecuteMethod.stream_iterate) def stream_iterate( self, @@ -1907,7 +1896,6 @@ class Application(Generic[ApplicationStateType]): if self._should_halt_iterate(halt_before, halt_after, next_action): break - @telemetry.capture_function_usage @_call_execute_method_pre_post(ExecuteMethod.astream_iterate) async def astream_iterate( self, @@ -1941,7 +1929,6 @@ class Application(Generic[ApplicationStateType]): if self._should_halt_iterate(halt_before, halt_after, next_action): break - @telemetry.capture_function_usage def visualize( self, output_file_path: Optional[str] = None, @@ -2730,7 +2717,6 @@ class ApplicationBuilder(Generic[StateType]): state_initializer=self.state_initializer, ) - @telemetry.capture_function_usage def build(self) -> Application[StateType]: """Builds the application for synchronous runs. @@ -2770,7 +2756,6 @@ class ApplicationBuilder(Generic[StateType]): return self._build_common() - @telemetry.capture_function_usage async def abuild(self) -> Application[StateType]: """Builds the application for asynchronous runs. diff --git a/burr/core/graph.py b/burr/core/graph.py index 521f7c00..87113596 100644 --- a/burr/core/graph.py +++ b/burr/core/graph.py @@ -22,7 +22,6 @@ import logging import pathlib from typing import Any, Callable, List, Literal, Optional, Set, Tuple, Union -from burr import telemetry from burr.core.action import Action, Condition, create_action, default from burr.core.state import State from burr.core.validation import BASE_ERROR_MESSAGE, assert_set @@ -180,7 +179,6 @@ class Graph: ) return self._action_tag_map.get(tag) - @telemetry.capture_function_usage def visualize( self, output_file_path: Optional[Union[str, pathlib.Path]] = None, diff --git a/burr/telemetry.py b/burr/telemetry.py index 791b8504..788d3912 100644 --- a/burr/telemetry.py +++ b/burr/telemetry.py @@ -16,295 +16,19 @@ # under the License. """ -This module contains code that relates to sending Burr usage telemetry. +Telemetry has been removed from Burr per ASF policy. -To disable sending telemetry there are three ways: - -1. Set it to false programmatically in your driver: - >>> from burr import telemetry - >>> telemetry.disable_telemetry() -2. Set it to `false` in ~/.burr.conf under `DEFAULT` - [DEFAULT] - telemetry_enabled = False -3. Set BURR_TELEMETRY_ENABLED=false as an environment variable: - BURR_TELEMETRY_ENABLED=false python run.py - or: - export BURR_TELEMETRY_ENABLED=false +This module is kept as a backwards-compatible no-op stub so that +any external code that imports from it (e.g. ``telemetry.disable_telemetry()``) +will continue to work without error. """ -import configparser -import functools -import importlib.metadata -import json -import logging -import os -import platform -import threading -import uuid -from typing import TYPE_CHECKING, Callable, List, TypeVar -from urllib import request - -if TYPE_CHECKING: - from burr.lifecycle import internal - -try: - VERSION = importlib.metadata.version("apache-burr") -except importlib.metadata.PackageNotFoundError: - # Fallback for older installations or development - VERSION = importlib.metadata.version("burr") - -logger = logging.getLogger(__name__) - -STR_VERSION = ".".join([str(i) for i in VERSION]) -HOST = "https://app.posthog.com" -TRACK_URL = f"{HOST}/capture/" # https://posthog.com/docs/api/post-only-endpoints -API_KEY = "phc_qMa4hWDdTruKaDb4Oa0tK0i1SKf69xf81OCFzjX6z4U" -APPLICATION_FUNCTION = "os_burr_application_function_call" -CLI_COMMAND = "os_burr_cli_command" -TIMEOUT = 2 -MAX_COUNT_SESSION = 10 # max number of events collected per python process - -DEFAULT_CONFIG_LOCATION = os.path.expanduser("~/.burr.conf") - - -def _load_config(config_location: str) -> configparser.ConfigParser: - """Pulls config. Gets/sets default anonymous ID. - - Creates the anonymous ID if it does not exist, writes it back if so. - :param config_location: location of the config file. - """ - config = configparser.ConfigParser() - try: - with open(config_location) as f: - config.read_file(f) - except Exception: - config["DEFAULT"] = {} - else: - if "DEFAULT" not in config: - config["DEFAULT"] = {} - - if "anonymous_id" not in config["DEFAULT"]: - config["DEFAULT"]["anonymous_id"] = str(uuid.uuid4()) - try: - with open(config_location, "w") as f: - config.write(f) - except Exception: - pass - return config - - -def _check_config_and_environ_for_telemetry_flag( - telemetry_default: bool, config_obj: configparser.ConfigParser -): - """Checks the config and environment variables for the telemetry value. - - Note: the environment variable has greater precedence than the config value. - """ - telemetry_enabled = telemetry_default - if "telemetry_enabled" in config_obj["DEFAULT"]: - try: - telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled") - except ValueError as e: - logger.debug( - "Unable to parse value for `telemetry_enabled` from config. " f"Encountered {e}" - ) - if os.environ.get("BURR_TELEMETRY_ENABLED") is not None: - env_value = os.environ.get("BURR_TELEMETRY_ENABLED") - # set the value - config_obj["DEFAULT"]["telemetry_enabled"] = env_value - try: - telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled") - except ValueError as e: - logger.debug( - "Unable to parse value for `BURR_TELEMETRY_ENABLED` from environment. " - f"Encountered {e}" - ) - return telemetry_enabled - - -config = _load_config(DEFAULT_CONFIG_LOCATION) -g_telemetry_enabled = _check_config_and_environ_for_telemetry_flag(True, config) -g_anonymous_id = config["DEFAULT"]["anonymous_id"] -call_counter = 0 - def disable_telemetry(): - """Disables telemetry tracking.""" - global g_telemetry_enabled - g_telemetry_enabled = False + """No-op. Telemetry has been removed.""" + pass def is_telemetry_enabled() -> bool: - """Returns whether telemetry tracking is enabled or not. - - Increments a counter to stop sending telemetry after 1000 invocations. - """ - if g_telemetry_enabled: - global call_counter - if call_counter == 0: - # Log only the first time someone calls this function; don't want to spam them. - logger.info( - "Note: Burr collects completely anonymous data about usage. " - "This will help us improve Burr over time. " - "See https://burr.apache.org/reference/telemetry/ for details." - ) - call_counter += 1 - if call_counter > MAX_COUNT_SESSION: - # we have hit our limit -- disable telemetry. - return False - return True - else: - return False - - -# base properties to instantiate on module load. -BASE_PROPERTIES = { - "os_type": os.name, - "os_version": platform.platform(), - "python_version": f"{platform.python_version()}/{platform.python_implementation()}", - "distinct_id": g_anonymous_id, - "burr_version": VERSION, - "telemetry_version": "0.0.1", -} - - -def create_application_function_run_event(function_name: str) -> dict: - """Function to create payload for tracking function name invocation. - - :param function_name: the name of the driver function - :return: dict representing the JSON to send. - """ - event = { - "api_key": API_KEY, - "event": APPLICATION_FUNCTION, - "properties": {}, - } - event["properties"].update(BASE_PROPERTIES) - payload = { - "function_name": function_name, # what was the name of the driver function? - } - event["properties"].update(payload) - return event - - -def _send_event_json(event_json: dict): - """Internal function to send the event JSON to posthog. - - :param event_json: the dictionary of data to JSON serialize and send - """ - headers = { - "Content-Type": "application/json", - "Authorization": "TODO", - "User-Agent": f"burr/{STR_VERSION}", - } - try: - data = json.dumps(event_json).encode() - req = request.Request(TRACK_URL, data=data, headers=headers) - with request.urlopen(req, timeout=TIMEOUT) as f: - res = f.read() - if f.code != 200: - raise RuntimeError(res) - except Exception as e: - if logger.isEnabledFor(logging.DEBUG): - logging.debug(f"Failed to send telemetry data: {e}") - else: - if logger.isEnabledFor(logging.DEBUG): - logging.debug(f"Succeed in sending telemetry consisting of [{data}].") - - -def send_event_json(event_json: dict): - """Sends the event json in its own thread. - - :param event_json: the data to send - """ - if not g_telemetry_enabled: - raise RuntimeError("Won't send; tracking is disabled!") - try: - th = threading.Thread(target=_send_event_json, args=(event_json,)) - th.start() - except Exception as e: - # capture any exception! - if logger.isEnabledFor(logging.DEBUG): - logger.debug(f"Encountered error while sending event JSON via it's own thread:\n{e}") - - -def get_all_adapters_names(adapter: "internal.LifecycleAdapterSet") -> List[str]: - """Gives a list of all adapter names in the LifecycleAdapterSet. - Simply a loop over the adapters it contains. - - :param adapter: LifecycleAdapterSet object. - :return: list of adapter names. - """ - adapters = adapter.adapters - out = [] - for adapter in adapters: - out.append(get_adapter_name(adapter)) - return out - - -def get_adapter_name(adapter: "internal.LifecycleAdapter") -> str: - """Get the class name of the ``burr`` adapter used. - - If we detect it's not a Burr one, we do not track it. - - :param adapter: lifecycle.internal.LifecycleAdapter object. - :return: string module + class name of the adapter. - """ - # Check whether it's a burr based adapter - if adapter.__module__.startswith("burr."): - adapter_name = f"{adapter.__module__}.{adapter.__class__.__name__}" - else: - adapter_name = "custom_adapter" - return adapter_name - - -def create_and_send_cli_event(command: str): - """Function that creates JSON and sends to track CLI usage. - - :param command: the CLI command run. - """ - event = { - "api_key": API_KEY, - "event": CLI_COMMAND, - "properties": {}, - } - event["properties"].update(BASE_PROPERTIES) - - payload = { - "command": command, - } - event["properties"].update(payload) - send_event_json(event) - - -CallableT = TypeVar("CallableT", bound=Callable) - - -def capture_function_usage(call_fn: CallableT) -> CallableT: - """Decorator to wrap some application functions for telemetry capture. - - We want to use this for non-execute functions. - We don't capture information about the arguments at this stage, - just the function name. - - :param call_fn: the Driver function to capture. - :return: wrapped function. - """ - - @functools.wraps(call_fn) - def wrapped_fn(*args, **kwargs): - try: - return call_fn(*args, **kwargs) - finally: - if is_telemetry_enabled(): - try: - function_name = call_fn.__name__ - event_json = create_application_function_run_event(function_name) - send_event_json(event_json) - except Exception as e: - if logger.isEnabledFor(logging.DEBUG): - logger.error( - f"Failed to send telemetry for function usage. Encountered: {e}\n" - ) - - return wrapped_fn + """Always returns False. Telemetry has been removed.""" + return False diff --git a/docs/reference/index.rst b/docs/reference/index.rst index 614a53f7..d734972e 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -43,4 +43,3 @@ need functionality that is not publicly exposed, please open an issue and we can parallelism typing integrations/index - telemetry diff --git a/docs/reference/telemetry.rst b/docs/reference/telemetry.rst deleted file mode 100644 index cbdf5be3..00000000 --- a/docs/reference/telemetry.rst +++ /dev/null @@ -1,66 +0,0 @@ -.. - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. - - -============================== -Usage analytics + data privacy -============================== - -By default, when using Burr, it collects anonymous usage data to help improve Burr and know where to apply development efforts. - -We capture events on the following occasions: - -1. When an application is built -2. When one of the ``execution`` functions is run in ``Application`` -3. When a CLI command is run - -The captured data is limited to: - -- Operating System and Python version -- A persistent UUID to indentify the session, stored in ~/.burr.conf. -- The name of the function/CLI command that was run - -If you're worried, see ``telemetry.py`` for details. - -If you do not wish to participate, one can opt-out with one of the following methods: - -1. Set it to false programmatically in your code before creating a Burr application builder: - -.. code-block:: python - - from burr import telemetry - telemetry.disable_telemetry() - -2. Set the key telemetry_enabled to false in ``~/.burr.conf`` under the DEFAULT section: - -.. code-block:: ini - - [DEFAULT] - telemetry_enabled = False - -3. Set BURR_TELEMETRY_ENABLED=false as an environment variable. Either setting it for your shell session: - -.. code-block:: bash - - export BURR_TELEMETRY_ENABLED=false - -or passing it as part of the run command: - -.. code-block:: bash - - BURR_TELEMETRY_ENABLED=false python NAME_OF_MY_DRIVER.py diff --git a/tests/conftest.py b/tests/conftest.py index 1a9d702a..13a83393 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,7 +14,3 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. - -from burr.telemetry import disable_telemetry - -disable_telemetry()
