This is an automated email from the ASF dual-hosted git repository.
skrawcz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hamilton.git
The following commit(s) were added to refs/heads/main by this push:
new 45673b02 Remove usage telemetry (PostHog) from Hamilton (#1512)
45673b02 is described below
commit 45673b020f84f23eb982d0a563fe057cd971aeab
Author: Stefan Krawczyk <[email protected]>
AuthorDate: Sun Mar 8 15:25:13 2026 -0700
Remove usage telemetry (PostHog) from Hamilton (#1512)
* Remove usage telemetry (PostHog) from Hamilton
ASF policy does not permit phone-home telemetry in released artifacts.
This removes all PostHog event tracking while keeping hamilton/telemetry.py
as a backwards-compatible no-op stub so downstream code that calls
`telemetry.disable_telemetry()` continues to work without changes.
* Add deprecation decorators to telemetry stub functions
Mark disable_telemetry() and is_telemetry_enabled() as deprecated
(warn at 1.89.0, fail at 2.0.0) so users get a clear signal to
remove these calls before the next major release.
---
contrib/hamilton/contrib/__init__.py | 10 -
docs/get-started/license.rst | 23 --
docs/index.md | 1 -
docs/reference/disabling-telemetry.md | 40 --
hamilton/async_driver.py | 67 +---
hamilton/cli/__main__.py | 3 -
hamilton/contrib/__init__.py | 8 -
hamilton/dataflows/__init__.py | 35 +-
hamilton/driver.py | 138 +------
hamilton/plugins/h_experiments/__main__.py | 5 -
hamilton/registry.py | 2 -
hamilton/telemetry.py | 549 ++------------------------
plugin_tests/h_dask/conftest.py | 5 -
plugin_tests/h_narwhals/conftest.py | 21 -
plugin_tests/h_pandas/conftest.py | 21 -
plugin_tests/h_polars/conftest.py | 21 -
plugin_tests/h_ray/conftest.py | 5 -
plugin_tests/h_spark/conftest.py | 21 -
plugin_tests/h_vaex/conftest.py | 21 -
tests/conftest.py | 5 -
tests/test_async_driver.py | 36 --
tests/test_hamilton_driver.py | 188 +--------
tests/test_telemetry.py | 237 -----------
ui/backend/server/trackingserver_base/apps.py | 35 --
ui/sdk/tests/conftest.py | 20 -
25 files changed, 37 insertions(+), 1480 deletions(-)
diff --git a/contrib/hamilton/contrib/__init__.py
b/contrib/hamilton/contrib/__init__.py
index 951292d1..1d6df3cf 100644
--- a/contrib/hamilton/contrib/__init__.py
+++ b/contrib/hamilton/contrib/__init__.py
@@ -23,22 +23,12 @@ try:
except ImportError:
from version import VERSION as __version__ # noqa: F401
-from hamilton import telemetry
-
-
-def track(module_name: str):
- """Function to call to track module usage."""
- if hasattr(telemetry, "create_and_send_contrib_use"): # makes sure
Hamilton version is fine.
- telemetry.create_and_send_contrib_use(module_name, __version__)
-
@contextmanager
def catch_import_errors(module_name: str, file_location: str, logger:
logging.Logger):
try:
# Yield control to the inner block which will have the import
statements.
yield
- # After all imports succeed send telemetry
- track(module_name)
except ImportError as e:
location = file_location[: file_location.rfind("/")]
logger.error("ImportError: %s", e)
diff --git a/docs/get-started/license.rst b/docs/get-started/license.rst
index 71576cc6..132cfaca 100644
--- a/docs/get-started/license.rst
+++ b/docs/get-started/license.rst
@@ -3,26 +3,3 @@ License
=======
Apache Hamilton is released under the `Apache 2.0 License
<https://github.com/apache/hamilton/blob/main/LICENSE>`_.
-
-
-
-Usage analytics & data privacy
------------------------------------
-By default, when using Apache Hamilton, it collects anonymous usage data to
help improve Apache Hamilton and know where to apply development
-efforts.
-
-We capture three types of events: one when the `Driver` object is
instantiated, one when the `execute()` call on the \
-`Driver` object completes, and one for most `Driver` object function
invocations.
-No user data or potentially sensitive information is or ever will be
collected. The captured data is limited to:
-
-* Operating System and Python version
-* A persistent UUID to indentify the session, stored in ~/.hamilton.conf.
-* Error stack trace limited to Apache Hamilton code, if one occurs.
-* Information on what features you're using from Apache Hamilton: decorators,
adapters, result builders.
-* How Apache Hamilton is being used: number of final nodes in DAG, number of
modules, size of objects passed to `execute()`, \
- the name of the Driver function being invoked.
-
-
-Else see :doc:`/reference/disabling-telemetry` for how to disable telemetry.
-
-Otherwise we invite you to inspect telemetry.py for details.
diff --git a/docs/index.md b/docs/index.md
index 659e4ea4..2c2c25de 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -76,7 +76,6 @@ reference/lifecycle-hooks/index
reference/result-builders/index
reference/io/index
reference/dataflows/index
-reference/disabling-telemetry.md
```
```{toctree}
diff --git a/docs/reference/disabling-telemetry.md
b/docs/reference/disabling-telemetry.md
deleted file mode 100644
index 1ab75a9c..00000000
--- a/docs/reference/disabling-telemetry.md
+++ /dev/null
@@ -1,40 +0,0 @@
-<!--
-Licensed to the Apache Software Foundation (ASF) under one
-or more contributor license agreements. See the NOTICE file
-distributed with this work for additional information
-regarding copyright ownership. The ASF licenses this file
-to you under the Apache License, Version 2.0 (the
-"License"); you may not use this file except in compliance
-with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing,
-software distributed under the License is distributed on an
-"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-KIND, either express or implied. See the License for the
-specific language governing permissions and limitations
-under the License.
--->
-
-# Telemetry
-
-If you do not wish to participate in telemetry capture, one can opt-out with
one of the following methods:
-1. Set it to false programmatically in your code before creating a Hamilton
Driver:
- ```python
- from hamilton import telemetry
- telemetry.disable_telemetry()
- ```
-2. Set the key `telemetry_enabled` to `false` in ~/.hamilton.conf under the
`DEFAULT` section:
- ```
- [DEFAULT]
- telemetry_enabled = False
- ```
-3. Set HAMILTON_TELEMETRY_ENABLED=false as an environment variable. Either
setting it for your shell session:
- ```bash
- export HAMILTON_TELEMETRY_ENABLED=false
- ```
- or passing it as part of the run command:
- ```bash
- HAMILTON_TELEMETRY_ENABLED=false python NAME_OF_MY_DRIVER.py
- ```
diff --git a/hamilton/async_driver.py b/hamilton/async_driver.py
index a6c45b61..46f995e7 100644
--- a/hamilton/async_driver.py
+++ b/hamilton/async_driver.py
@@ -18,15 +18,12 @@
import asyncio
import inspect
import logging
-import sys
-import time
import typing
import uuid
-from types import ModuleType
from typing import Any
import hamilton.lifecycle.base as lifecycle_base
-from hamilton import base, driver, graph, lifecycle, node, telemetry
+from hamilton import base, driver, graph, lifecycle, node
from hamilton.execution.graph_functions import create_error_message
from hamilton.io.materialization import ExtractorFactory, MaterializerFactory
@@ -375,9 +372,6 @@ class AsyncDriver(driver.Driver):
"display_graph=True is not supported for the async graph
adapter. "
"Instead you should be using visualize_execution."
)
- start_time = time.time()
- run_successful = True
- error = None
_final_vars = self._create_final_vars(final_vars)
try:
outputs = await self.raw_execute(_final_vars, overrides,
display_graph, inputs=inputs)
@@ -386,67 +380,8 @@ class AsyncDriver(driver.Driver):
return
self.adapter.call_lifecycle_method_sync("do_build_result", outputs=outputs)
return outputs
except Exception as e:
- run_successful = False
logger.error(driver.SLACK_ERROR_MESSAGE)
- error = telemetry.sanitize_error(*sys.exc_info())
raise e
- finally:
- duration = time.time() - start_time
- # ensure we can capture telemetry in async friendly way.
- if telemetry.is_telemetry_enabled():
-
- async def make_coroutine():
- self.capture_execute_telemetry(
- error, final_vars, inputs, overrides, run_successful,
duration
- )
-
- try:
- # we don't have to await because we are running within the
event loop.
- asyncio.create_task(make_coroutine())
- except Exception as e:
- if logger.isEnabledFor(logging.DEBUG):
- logger.error(f"Encountered error submitting async
telemetry:\n{e}")
-
- def capture_constructor_telemetry(
- self,
- error: str | None,
- modules: tuple[ModuleType],
- config: dict[str, Any],
- adapter: base.HamiltonGraphAdapter,
- ):
- """Ensures we capture constructor telemetry the right way in an async
context.
-
- This is a simpler wrapper around what's in the driver class.
-
- :param error: sanitized error string, if any.
- :param modules: tuple of modules to build DAG from.
- :param config: config to create the driver.
- :param adapter: adapter class object.
- """
- if telemetry.is_telemetry_enabled():
- try:
- # check whether the event loop has been started yet or not
- loop = asyncio.get_event_loop()
- if loop.is_running():
- loop.run_in_executor(
- None,
- super(AsyncDriver, self).capture_constructor_telemetry,
- error,
- modules,
- config,
- adapter,
- )
- else:
-
- async def make_coroutine():
- super(AsyncDriver, self).capture_constructor_telemetry(
- error, modules, config, adapter
- )
-
- loop.run_until_complete(make_coroutine())
- except Exception as e:
- if logger.isEnabledFor(logging.DEBUG):
- logger.error(f"Encountered error submitting async
telemetry:\n{e}")
class Builder(driver.Builder):
diff --git a/hamilton/cli/__main__.py b/hamilton/cli/__main__.py
index 13036954..891cce44 100644
--- a/hamilton/cli/__main__.py
+++ b/hamilton/cli/__main__.py
@@ -32,7 +32,6 @@ with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=UserWarning)
from hamilton import driver
-from hamilton import telemetry
from hamilton.cli import commands
logger = logging.getLogger(__name__)
@@ -120,8 +119,6 @@ def main(
] = False,
):
"""Hamilton CLI"""
- if telemetry.is_telemetry_enabled():
- telemetry.create_and_send_cli_event(ctx.invoked_subcommand)
state.verbose = verbose
state.json_out = json_out
logger.debug(f"verbose set to {verbose}")
diff --git a/hamilton/contrib/__init__.py b/hamilton/contrib/__init__.py
index e6d37700..f6bc625f 100644
--- a/hamilton/contrib/__init__.py
+++ b/hamilton/contrib/__init__.py
@@ -25,20 +25,12 @@ from contextlib import contextmanager
__version__ = "__unknown__" # this will be overwritten once
sf-hamilton-contrib is installed.
-from hamilton import telemetry
-
@contextmanager
def catch_import_errors(module_name: str, file_location: str, logger:
logging.Logger):
try:
# Yield control to the inner block which will have the import
statements.
yield
- # After all imports succeed send telemetry
- if "." in module_name:
- telemetry.create_and_send_contrib_use(module_name, __version__)
- else:
- # we are importing it dynamically thus a "package" isn't present
so file_location has the info.
- telemetry.create_and_send_contrib_use(file_location, __version__)
except ImportError as e:
location = file_location[: file_location.rfind("/")]
logger.error("ImportError: %s", e)
diff --git a/hamilton/dataflows/__init__.py b/hamilton/dataflows/__init__.py
index 1fece4ab..3c8c0bfc 100644
--- a/hamilton/dataflows/__init__.py
+++ b/hamilton/dataflows/__init__.py
@@ -22,7 +22,6 @@ dataflows.
TODO: expect this to have a CLI interface in the future.
"""
-import functools
import importlib
import json
import logging
@@ -36,7 +35,7 @@ from collections.abc import Callable
from types import ModuleType
from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple,
Type, Union
-from hamilton import driver, telemetry
+from hamilton import driver
if TYPE_CHECKING:
import builtins
@@ -65,37 +64,17 @@ OFFICIAL_PATH = DATAFLOW_FOLDER + "/" + COMMON_PATH +
"/dagworks/{dataflow}"
def _track_function_call(call_fn: Callable) -> Callable:
- """Decorator to wrap the __call__ to count usage.
+ """No-op decorator kept for backwards compatibility.
- :param call_fn: the `__call__` function.
- :return: the wrapped call function.
+ :param call_fn: the function.
+ :return: the same function, unwrapped.
"""
-
- @functools.wraps(call_fn)
- def track_call(*args, **kwargs):
- event_json =
telemetry.create_dataflow_function_invocation_event_json(call_fn.__name__)
- telemetry.send_event_json(event_json)
- return call_fn(*args, **kwargs)
-
- return track_call
+ return call_fn
def _track_download(is_official: bool, user: str | None, dataflow_name: str,
version: str):
- """Inner function to track "downloads" of a dataflow.
-
- :param is_official: is this an official dataflow? False == user.
- :param user: If not official, what is the github user name.
- :param dataflow_name: the name of the dataflow
- :param version: the version. Either git hash, or the package version.
- """
- if is_official:
- category = "DAGWORKS"
- else:
- category = "USER"
- event_json = telemetry.create_dataflow_download_event_json(
- category, user, dataflow_name, version
- )
- telemetry.send_event_json(event_json)
+ """No-op. Telemetry has been removed."""
+ pass
def _get_request(url: str) -> tuple[int, str]:
diff --git a/hamilton/driver.py b/hamilton/driver.py
index 6186aa71..cc043993 100644
--- a/hamilton/driver.py
+++ b/hamilton/driver.py
@@ -16,7 +16,6 @@
# under the License.
import abc
-import functools
import importlib
import importlib.util
import json
@@ -24,7 +23,6 @@ import logging
import operator
import pathlib
import sys
-import time
# required if we want to run this code stand alone.
import typing
@@ -61,41 +59,19 @@ if __name__ == "__main__":
import base
import graph
import node
- import telemetry
else:
- from . import base, graph, node, telemetry
+ from . import base, graph, node
logger = logging.getLogger(__name__)
def capture_function_usage(call_fn: Callable) -> Callable:
- """Decorator to wrap some driver functions for telemetry capture.
+ """No-op decorator kept for backwards compatibility.
- We want to use this for non-constructor and non-execute functions.
- We don't capture information about the arguments at this stage,
- just the function name.
-
- :param call_fn: the Driver function to capture.
- :return: wrapped function.
+ :param call_fn: the Driver function.
+ :return: the same function, unwrapped.
"""
-
- @functools.wraps(call_fn)
- def wrapped_fn(*args, **kwargs):
- try:
- return call_fn(*args, **kwargs)
- finally:
- if telemetry.is_telemetry_enabled():
- try:
- function_name = call_fn.__name__
- event_json =
telemetry.create_driver_function_invocation_event(function_name)
- telemetry.send_event_json(event_json)
- except Exception as e:
- if logger.isEnabledFor(logging.DEBUG):
- logger.error(
- f"Failed to send telemetry for function usage.
Encountered: {e}\n"
- )
-
- return wrapped_fn
+ return call_fn
# This is kept in here for backwards compatibility
@@ -450,7 +426,6 @@ class Driver:
adapter = self.normalize_adapter_input(adapter,
use_legacy_adapter=_use_legacy_adapter)
if adapter.does_hook("pre_do_anything", is_async=False):
adapter.call_all_lifecycle_hooks_sync("pre_do_anything")
- error = None
self.graph_modules = modules
try:
self.graph = graph.FunctionGraph.from_modules(
@@ -477,12 +452,8 @@ class Driver:
self.graph_executor = _graph_executor
self.config = config
except Exception as e:
- error = telemetry.sanitize_error(*sys.exc_info())
logger.error(SLACK_ERROR_MESSAGE)
raise e
- finally:
- # TODO -- update this to use the lifecycle methods
- self.capture_constructor_telemetry(error, modules, config, adapter)
def _repr_mimebundle_(self, include=None, exclude=None, **kwargs):
"""Attribute read by notebook renderers
@@ -494,47 +465,6 @@ class Driver:
dot = self.display_all_functions()
return dot._repr_mimebundle_(include=include, exclude=exclude,
**kwargs)
- def capture_constructor_telemetry(
- self,
- error: str | None,
- modules: tuple[ModuleType],
- config: dict[str, Any],
- adapter: lifecycle_base.LifecycleAdapterSet,
- ):
- """Captures constructor telemetry. Notes:
- (1) we want to do this in a way that does not break.
- (2) we need to account for all possible states, e.g. someone passing
in None, or assuming that
- the entire constructor code ran without issue, e.g. `adapter` was
assigned to `self`.
-
- :param error: the sanitized error string to send.
- :param modules: the list of modules, could be None.
- :param config: the config dict passed, could be None.
- :param adapter: the adapter passed in, might not be attached to `self`
yet.
- """
- if telemetry.is_telemetry_enabled():
- try:
- # adapter_name = telemetry.get_adapter_name(adapter)
- lifecycle_adapter_names =
telemetry.get_all_adapters_names(adapter)
- result_builder = telemetry.get_result_builder_name(adapter)
- # being defensive here with ensuring values exist
- payload = telemetry.create_start_event_json(
- len(self.graph.nodes) if hasattr(self, "graph") else 0,
- len(modules) if modules else 0,
- len(config) if config else 0,
- dict(self.graph.decorator_counter) if hasattr(self,
"graph") else {},
- "deprecated -- see lifecycle_adapters_used",
- lifecycle_adapter_names,
- result_builder,
- self.driver_run_id,
- error,
- self.graph_executor.__class__.__name__,
- )
- telemetry.send_event_json(payload)
- except Exception as e:
- # we don't want this to fail at all!
- if logger.isEnabledFor(logging.DEBUG):
- logger.debug(f"Error caught in processing telemetry: {e}")
-
@staticmethod
def validate_inputs(
fn_graph: graph.FunctionGraph,
@@ -617,11 +547,9 @@ class Driver:
"display_graph=True is deprecated. It will be removed in the
2.0.0 release. "
"Please use visualize_execution()."
)
- start_time = time.time()
run_id = str(uuid.uuid4())
run_successful = True
error_execution = None
- error_telemetry = None
outputs = None
_final_vars = self._create_final_vars(final_vars)
if self.adapter.does_hook("pre_graph_execute", is_async=False):
@@ -647,7 +575,6 @@ class Driver:
run_successful = False
logger.error(SLACK_ERROR_MESSAGE)
error_execution = e
- error_telemetry = telemetry.sanitize_error(*sys.exc_info())
raise e
finally:
if self.adapter.does_hook("post_graph_execute", is_async=False):
@@ -659,10 +586,6 @@ class Driver:
error=error_execution,
results=outputs,
)
- duration = time.time() - start_time
- self.capture_execute_telemetry(
- error_telemetry, _final_vars, inputs, overrides,
run_successful, duration
- )
return outputs
def _create_final_vars(self, final_vars: list[str | Callable | Variable])
-> list[str]:
@@ -675,45 +598,6 @@ class Driver:
_final_vars = common.convert_output_values(final_vars, _module_set)
return _final_vars
- def capture_execute_telemetry(
- self,
- error: str | None,
- final_vars: list[str],
- inputs: dict[str, Any],
- overrides: dict[str, Any],
- run_successful: bool,
- duration: float,
- ):
- """Captures telemetry after execute has run.
-
- Notes:
- (1) we want to be quite defensive in not breaking anyone's code with
things we do here.
- (2) thus we want to double-check that values exist before doing
something with them.
-
- :param error: the sanitized error string to capture, if any.
- :param final_vars: the list of final variables to get.
- :param inputs: the inputs to the execute function.
- :param overrides: any overrides to the execute function.
- :param run_successful: whether this run was successful.
- :param duration: time it took to run execute.
- """
- if telemetry.is_telemetry_enabled():
- try:
- payload = telemetry.create_end_event_json(
- run_successful,
- duration,
- len(final_vars) if final_vars else 0,
- len(overrides) if isinstance(overrides, dict) else 0,
- len(inputs) if isinstance(overrides, dict) else 0,
- self.driver_run_id,
- error,
- )
- telemetry.send_event_json(payload)
- except Exception as e:
- # we don't want this to fail at all!
- if logger.isEnabledFor(logging.DEBUG):
- logger.debug(f"Error caught in processing telemetry:
\n{e}")
-
@deprecation.deprecated(
warn_starting=(1, 0, 0),
fail_starting=(2, 0, 0),
@@ -1646,10 +1530,8 @@ class Driver:
"""
if additional_vars is None:
additional_vars = []
- start_time = time.time()
run_successful = True
error_execution = None
- error_telemetry = None
run_id = str(uuid.uuid4())
outputs = (None, None)
final_vars = self._create_final_vars(additional_vars)
@@ -1707,7 +1589,6 @@ class Driver:
except Exception as e:
run_successful = False
logger.error(SLACK_ERROR_MESSAGE)
- error_telemetry = telemetry.sanitize_error(*sys.exc_info())
error_execution = e
raise e
finally:
@@ -1720,15 +1601,6 @@ class Driver:
error=error_execution,
results=outputs[1],
)
- duration = time.time() - start_time
- self.capture_execute_telemetry(
- error_telemetry,
- final_vars + materializer_vars,
- inputs,
- overrides,
- run_successful,
- duration,
- )
return outputs
@capture_function_usage
diff --git a/hamilton/plugins/h_experiments/__main__.py
b/hamilton/plugins/h_experiments/__main__.py
index fa848cff..bb54be4d 100644
--- a/hamilton/plugins/h_experiments/__main__.py
+++ b/hamilton/plugins/h_experiments/__main__.py
@@ -19,7 +19,6 @@ import argparse
import os
from pathlib import Path
-from hamilton import telemetry
from hamilton.plugins.h_experiments.cache import JsonCache
@@ -32,8 +31,6 @@ def main():
raise ModuleNotFoundError(
"Some dependencies are missing. Make sure to `pip install
sf-hamilton[experiments]`"
) from e
- if telemetry.is_telemetry_enabled():
- telemetry.create_and_send_expt_server_event("startup")
parser = argparse.ArgumentParser(prog="hamilton-experiments")
parser.description = "Hamilton Experiment Server launcher"
@@ -59,8 +56,6 @@ def main():
os.environ["HAMILTON_EXPERIMENTS_PATH"] = str(Path(args.path).resolve())
uvicorn.run("hamilton.plugins.h_experiments.server:app", host=args.host,
port=args.port)
- if telemetry.is_telemetry_enabled():
- telemetry.create_and_send_expt_server_event("shutdown")
if __name__ == "__main__":
diff --git a/hamilton/registry.py b/hamilton/registry.py
index 8e5b383c..cda2fa80 100644
--- a/hamilton/registry.py
+++ b/hamilton/registry.py
@@ -53,8 +53,6 @@ ExtensionName = Literal[
]
HAMILTON_EXTENSIONS: tuple[ExtensionName, ...] = get_args(ExtensionName)
HAMILTON_AUTOLOAD_ENV = "HAMILTON_AUTOLOAD_EXTENSIONS"
-# NOTE the variable DEFAULT_CONFIG_LOCAITON is redundant with
`hamilton.telemetry`
-# but this `registry` module must avoid circular imports
DEFAULT_CONFIG_LOCATION = pathlib.Path("~/.hamilton.conf").expanduser()
# This is a dictionary of extension name -> dict with dataframe and column
types.
diff --git a/hamilton/telemetry.py b/hamilton/telemetry.py
index f6623d5e..d157ef72 100644
--- a/hamilton/telemetry.py
+++ b/hamilton/telemetry.py
@@ -16,538 +16,35 @@
# under the License.
"""
-This module contains code that relates to sending Hamilton usage telemetry.
+Telemetry has been removed from Hamilton.
-To disable sending telemetry there are three ways:
-
-1. Set it to false programmatically in your driver:
- >>> from hamilton import telemetry
- >>> telemetry.disable_telemetry()
-2. Set it to `false` in ~/.hamilton.conf under `DEFAULT`
- [DEFAULT]
- telemetry_enabled = True
-3. Set HAMILTON_TELEMETRY_ENABLED=false as an environment variable:
- HAMILTON_TELEMETRY_ENABLED=false python run.py
- or:
- export HAMILTON_TELEMETRY_ENABLED=false
+This module is kept as a no-op stub for backwards compatibility,
+so that any user code calling ``telemetry.disable_telemetry()``
+will not break.
"""
-import configparser
-import json
-import logging
-import os
-import platform
-import threading
-import traceback
-import uuid
-from urllib import request
-
-try:
- from . import base
- from .lifecycle import base as lifecycle_base
- from .version import VERSION
-except ImportError:
- from version import VERSION
-
- from hamilton import base
- from hamilton.lifecycle import base as lifecycle_base
-
-logger = logging.getLogger(__name__)
-
-STR_VERSION = ".".join([str(i) for i in VERSION])
-HOST = "https://app.posthog.com"
-TRACK_URL = f"{HOST}/capture/" #
https://posthog.com/docs/api/post-only-endpoints
-API_KEY = "phc_mZg8bkn3yvMxqvZKRlMlxjekFU5DFDdcdAsijJ2EH5e"
-START_EVENT = "os_hamilton_run_start"
-END_EVENT = "os_hamilton_run_end"
-DRIVER_FUNCTION = "os_hamilton_driver_function_call"
-DATAFLOW_FUNCTION = "os_hamilton_dataflow_function_call"
-DATAFLOW_DOWNLOAD = "os_hamilton_dataflow_download_call"
-DATAFLOW_IMPORT = "os_hamilton_dataflow_import_call"
-CLI_COMMAND = "os_hamilton_cli_command"
-EXPERIMENT_SERVER = "os_hamilton_experiment_server"
-TIMEOUT = 2
-MAX_COUNT_SESSION = 10 # max number of events collected per python process
-DEFAULT_CONFIG_URI = os.environ.get("HAMILTON_CONFIG_URI", "~/.hamilton.conf")
-DEFAULT_CONFIG_LOCATION = os.path.expanduser(DEFAULT_CONFIG_URI)
-
-
-def _load_config(config_location: str) -> configparser.ConfigParser:
- """Pulls config. Gets/sets default anonymous ID.
-
- Creates the anonymous ID if it does not exist, writes it back if so.
- :param config_location: location of the config file.
- """
- config = configparser.ConfigParser()
- try:
- with open(config_location) as f:
- config.read_file(f)
- except Exception:
- config["DEFAULT"] = {}
- else:
- if "DEFAULT" not in config:
- config["DEFAULT"] = {}
-
- if "anonymous_id" not in config["DEFAULT"]:
- config["DEFAULT"]["anonymous_id"] = str(uuid.uuid4())
- try:
- with open(config_location, "w") as f:
- config.write(f)
- except Exception:
- pass
- return config
-
-
-def _check_config_and_environ_for_telemetry_flag(
- telemetry_default: bool, config_obj: configparser.ConfigParser
-):
- """Checks the config and environment variables for the telemetry value.
-
- Note: the environment variable has greater precedence than the config
value.
- """
- telemetry_enabled = telemetry_default
- if "telemetry_enabled" in config_obj["DEFAULT"]:
- try:
- telemetry_enabled = config_obj.getboolean("DEFAULT",
"telemetry_enabled")
- except ValueError as e:
- logger.debug(
- f"Unable to parse value for `telemetry_enabled` from config.
Encountered {e}"
- )
- if os.environ.get("HAMILTON_TELEMETRY_ENABLED") is not None:
- env_value = os.environ.get("HAMILTON_TELEMETRY_ENABLED")
- # set the value
- config_obj["DEFAULT"]["telemetry_enabled"] = env_value
- try:
- telemetry_enabled = config_obj.getboolean("DEFAULT",
"telemetry_enabled")
- except ValueError as e:
- logger.debug(
- "Unable to parse value for `HAMILTON_TELEMETRY_ENABLED` from
environment. "
- f"Encountered {e}"
- )
- return telemetry_enabled
-
-
-config = _load_config(DEFAULT_CONFIG_LOCATION)
-g_telemetry_enabled = _check_config_and_environ_for_telemetry_flag(True,
config)
-g_anonymous_id = config["DEFAULT"]["anonymous_id"]
-call_counter = 0
+from hamilton.dev_utils.deprecation import deprecated
+@deprecated(
+ warn_starting=(1, 89, 0),
+ fail_starting=(2, 0, 0),
+ use_this=None,
+ explanation="Telemetry has been removed from Hamilton. This function is a
no-op.",
+ migration_guide="Simply remove any calls to
`telemetry.disable_telemetry()`.",
+)
def disable_telemetry():
- """Disables telemetry tracking."""
- global g_telemetry_enabled
- g_telemetry_enabled = False
+ """No-op. Telemetry has been removed."""
+ pass
+@deprecated(
+ warn_starting=(1, 89, 0),
+ fail_starting=(2, 0, 0),
+ use_this=None,
+ explanation="Telemetry has been removed from Hamilton. This function
always returns False.",
+ migration_guide="Simply remove any calls to
`telemetry.is_telemetry_enabled()`.",
+)
def is_telemetry_enabled() -> bool:
- """Returns whether telemetry tracking is enabled or not.
-
- Increments a counter to stop sending telemetry after 1000 invocations.
- """
- if g_telemetry_enabled:
- global call_counter
- if call_counter == 0:
- # Log only the first time someone calls this function; don't want
to spam them.
- logger.info(
- "Note: Hamilton collects completely anonymous data about
usage. "
- "This will help us improve Hamilton over time. "
- "See
https://hamilton.apache.org/get-started/license/#usage-analytics-data-privacy"
- " for details."
- )
- call_counter += 1
- if call_counter > MAX_COUNT_SESSION:
- # we have hit our limit -- disable telemetry.
- return False
- return True
- else:
- return False
-
-
-# base properties to instantiate on module load.
-BASE_PROPERTIES = {
- "os_type": os.name,
- "os_version": platform.platform(),
- "python_version":
f"{platform.python_version()}/{platform.python_implementation()}",
- "distinct_id": g_anonymous_id,
- "hamilton_version": list(VERSION),
- "telemetry_version": "0.0.1",
- "$process_person_profile": False,
-}
-
-
-def create_start_event_json(
- number_of_nodes: int,
- number_of_modules: int,
- number_of_config_items: int,
- decorators_used: dict[str, int],
- graph_adapter_used: str,
- lifecycle_adapters_used: list[str],
- result_builder_used: str,
- driver_run_id: uuid.UUID,
- error: str | None,
- graph_executor_class: str,
-):
- """Creates the start event JSON.
-
- The format we want to follow is the one for [post-hog](#
https://posthog.com/docs/api/post-only-endpoints).
-
- :param number_of_nodes: the number of nodes in the graph
- :param number_of_modules: the number of modules parsed
- :param number_of_config_items: the number of items in configuration
- :param decorators_used: a dict of decorator -> count
- :param graph_adapter_used: the name of the graph adapter used
- :param result_builder_used: the name of the result builder used
- :param driver_run_id: the ID of the run
- :param error: an error string if any
- :param driver_class: the name of the driver class used to call this
- :return: dictionary to send.
- """
- event = {
- "api_key": API_KEY,
- "event": START_EVENT,
- "properties": {},
- }
- event["properties"].update(BASE_PROPERTIES)
- payload = {
- "number_of_nodes": number_of_nodes, # approximately how many nodes
were in the DAG?
- "number_of_modules": number_of_modules, # approximately how many
modules were used?
- "number_of_config_items": number_of_config_items, # how many configs
are people passing in?
- "decorators_used": decorators_used, # what decorators were used, and
how many times?
- "graph_adapter_used": graph_adapter_used, # what was the graph
adapter used?
- "result_builder_used": result_builder_used, # what was the result
builder used?
- "driver_run_id": str(driver_run_id), # was this a new driver object?
or?
- "error": error, # if there was an error, what was the trace? (limited
to Hamilton code)
- "graph_executor_class": graph_executor_class, # what driver class was
used to call this
- "lifecycle_adapters_used": lifecycle_adapters_used, # what lifecycle
adapters were used?
- }
- event["properties"].update(payload)
- return event
-
-
-def create_end_event_json(
- is_success: bool,
- runtime_seconds: float,
- number_of_outputs: int,
- number_of_overrides: int,
- number_of_inputs: int,
- driver_run_id: uuid.UUID,
- error: str | None,
-):
- """Creates the end event JSON.
-
- The format we want to follow is the one for [post-hog](#
https://posthog.com/docs/api/post-only-endpoints).
-
- :param is_success: whether execute was successful
- :param runtime_seconds: how long execution took
- :param number_of_outputs: the number of outputs requested
- :param number_of_overrides: the number of overrides provided
- :param number_of_inputs: the number of inputs provided
- :param driver_run_id: the run ID of this driver run
- :param error: the error string if any
- :return: dictionary to send.
- """
- event = {
- "api_key": API_KEY,
- "event": END_EVENT,
- "properties": {},
- }
- event["properties"].update(BASE_PROPERTIES)
- payload = {
- "is_success": is_success, # was this run successful?
- "runtime_seconds": runtime_seconds, # how long did it take
- "number_of_outputs": number_of_outputs, # how many outputs were
requested
- "number_of_overrides": number_of_overrides, # how many outputs were
requested
- "number_of_inputs": number_of_inputs, # how many user provided things
are there
- "driver_run_id": str(driver_run_id), # let's tie this to a particular
driver instantiation
- "error": error, # if there was an error, what was the trace? (limited
to Hamilton code)
- }
- event["properties"].update(payload)
- return event
-
-
-def create_driver_function_invocation_event(function_name: str) -> dict:
- """Function to create payload for tracking function name invocation.
-
- :param function_name: the name of the driver function
- :return: dict representing the JSON to send.
- """
- event = {
- "api_key": API_KEY,
- "event": DRIVER_FUNCTION,
- "properties": {},
- }
- event["properties"].update(BASE_PROPERTIES)
- payload = {
- "function_name": function_name, # what was the name of the driver
function?
- }
- event["properties"].update(payload)
- return event
-
-
-def create_dataflow_function_invocation_event_json(
- canonical_function_name: str,
-) -> dict:
- """Function that creates JSON to track dataflow module function calls.
-
- :param canonical_function_name: the name of the function in the dataflow
module.
- :return: the dictionary representing the event.
- """
- event = {
- "api_key": API_KEY,
- "event": DATAFLOW_FUNCTION,
- "properties": {},
- }
- event["properties"].update(BASE_PROPERTIES)
- payload = {
- "function_name": canonical_function_name, # what was the name of the
driver function?
- }
- event["properties"].update(payload)
- return event
-
-
-def create_dataflow_download_event_json(
- category: str,
- user: str,
- dataflow_name: str,
- version: str,
-) -> dict:
- """Function that creates JSON to track dataflow download calls.
-
- :param category: the category of the dataflow. DAGWORKS or USER.
- :param user: the user's github handle, if applicable.
- :param dataflow_name: the name of the dataflow.
- :param version: the git commit version of the dataflow, OR the
sf-hamilton-contrib package version, or __unknown__.
- :return: dictionary representing the event.
- """
- event = {
- "api_key": API_KEY,
- "event": DATAFLOW_DOWNLOAD,
- "properties": {},
- }
- event["properties"].update(BASE_PROPERTIES)
- _category = "DAGWORKS" if category == "DAGWORKS" else "USER"
-
- payload = {
- "category": _category,
- "dataflow_name": dataflow_name,
- "commit_version": version,
- }
- if _category == "USER":
- payload["github_user"] = user
- event["properties"].update(payload)
- return event
-
-
-def create_and_send_contrib_use(module_name: str, version: str):
- """Function to send contrib module use -- this is used from the contrib
package.
-
- :param module_name: the name of the module, or file location of the code.
- :param version: the package version.
- """
- if module_name == "__main__" or module_name == "__init__":
- return
- try:
- # we need to handle the case that sf-hamilton-contrib is not installed.
- # if that's the case the file location will be the module name.
- if ".py" in module_name:
- contrib_index = module_name.rfind("/contrib/")
- if contrib_index == -1:
- if logger.isEnabledFor(logging.DEBUG):
- logger.debug(
- "Encountered error while constructing
create_and_send_contrib_use."
- )
- return
- parts = module_name[contrib_index:].split(os.sep)[1:-1]
- dataflows_index = module_name.find("/dataflows/")
- # get the commit sha out as the version
- version = module_name[
- dataflows_index + len("/dataflows/") :
module_name.find("/contrib/")
- ]
- else:
- parts = module_name.split(".")
- version = "sf-contrib-" + ".".join(map(str, version))
- if "dagworks" in parts:
- category = "DAGWORKS"
- user = None
- else:
- category = "USER"
- user = parts[-2]
- dataflow = parts[-1]
- event_json = create_dataflow_download_event_json(category, user,
dataflow, version)
- event_json["event"] = DATAFLOW_IMPORT # overwrite the event name.
- except Exception as e:
- # capture any exception!
- if logger.isEnabledFor(logging.DEBUG):
- logger.debug(
- f"Encountered error while constructing
create_and_send_contrib_use json:\n{e}"
- )
- else:
- send_event_json(event_json)
-
-
-def _send_event_json(event_json: dict):
- """Internal function to send the event JSON to posthog.
-
- :param event_json: the dictionary of data to JSON serialize and send
- """
- headers = {
- "Content-Type": "application/json",
- "Authorization": "TODO",
- "User-Agent": f"hamilton/{STR_VERSION}",
- }
- try:
- data = json.dumps(event_json).encode()
- req = request.Request(TRACK_URL, data=data, headers=headers)
- with request.urlopen(req, timeout=TIMEOUT) as f:
- res = f.read()
- if f.code != 200:
- raise RuntimeError(res)
- except Exception as e:
- if logger.isEnabledFor(logging.DEBUG):
- logger.debug(f"Failed to send telemetry data: {e}")
- else:
- if logger.isEnabledFor(logging.DEBUG):
- logger.debug(f"Succeed in sending telemetry consisting of
[{data}].")
-
-
-def send_event_json(event_json: dict):
- """Sends the event json in its own thread.
-
- :param event_json: the data to send
- """
- if not g_telemetry_enabled:
- raise RuntimeError("Won't send; tracking is disabled!")
- try:
- th = threading.Thread(target=_send_event_json, args=(event_json,))
- th.start()
- except Exception as e:
- # capture any exception!
- if logger.isEnabledFor(logging.DEBUG):
- logger.debug(f"Encountered error while sending event JSON via it's
own thread:\n{e}")
-
-
-def sanitize_error(exc_type, exc_value, exc_traceback) -> str:
- """Sanitizes an incoming error and pulls out a string to tell us where it
came from.
-
- :param exc_type: pulled from `sys.exc_info()`
- :param exc_value: pulled from `sys.exc_info()`
- :param exc_traceback: pulled from `sys.exc_info()`
- :return: string to use for telemetry
- """
- try:
- te = traceback.TracebackException(exc_type, exc_value, exc_traceback,
limit=-5)
- sanitized_string = ""
- for stack_item in te.stack:
- stack_file_path = stack_item.filename.split(os.sep)
- # take last 4 places only -- that's how deep hamilton is.
- stack_file_path = stack_file_path[-4:]
- try:
- # find first occurrence
- index = stack_file_path.index("hamilton")
- except ValueError:
- sanitized_string += "...<USER_CODE>...\n"
- continue
- file_name = "..." + "/".join(stack_file_path[index:])
- sanitized_string += f"{file_name}, line {stack_item.lineno}, in
{stack_item.name}\n"
- return sanitized_string
- except Exception as e:
- # we don't want this to fail
- if logger.isEnabledFor(logging.DEBUG):
- logger.debug(f"Encountered exception sanitizing error. Got:\n{e}")
- return "FAILED_TO_SANITIZE_ERROR"
-
-
-def get_all_adapters_names(adapter: lifecycle_base.LifecycleAdapterSet) ->
list[str]:
- """Gives a list of all adapter names in the LifecycleAdapterSet.
- Simply a loop over the adapters it contains.
-
- :param adapter: LifecycleAdapterSet object.
- :return: list of adapter names.
- """
- adapters = adapter.adapters
- out = []
- for adapter in adapters:
- out.append(get_adapter_name(adapter))
- return out
-
-
-def get_adapter_name(adapter: lifecycle_base.LifecycleAdapter) -> str:
- """Get the class name of the `hamilton` adapter used.
-
- If we detect it's not a Hamilton one, we do not track it.
-
- :param adapter: base.HamiltonGraphAdapter object.
- :return: string module + class name of the adapter.
- """
- # Check whether it's a hamilton based adapter
- if adapter.__module__.startswith("hamilton."):
- adapter_name = f"{adapter.__module__}.{adapter.__class__.__name__}"
- else:
- adapter_name = "custom_adapter"
- return adapter_name
-
-
-def get_result_builder_name(adapter: lifecycle_base.LifecycleAdapterSet) ->
str:
- """Get the class name of the `hamilton` result builder used.
-
- If we detect it's not a base one, we do not track it.
-
- :param adapter: base.HamiltonGraphAdapter object.
- :return: string module + class name of the result builder.
- """
- # if there is an attribute, get that out to use as the class to inspect
- result_builders = [item for item in adapter.adapters if hasattr(item,
"build_result")]
- if len(result_builders) == 0:
- result_builder_name = "no_result_builder"
- return result_builder_name
- class_to_inspect = result_builders[0]
- # all_adapters = adapter.adapters
- if hasattr(class_to_inspect, "result_builder"):
- class_to_inspect = class_to_inspect.result_builder
- # Go by class itself
- if isinstance(class_to_inspect, base.StrictIndexTypePandasDataFrameResult):
- result_builder_name =
"hamilton.base.StrictIndexTypePandasDataFrameResult"
- elif isinstance(class_to_inspect, base.PandasDataFrameResult):
- result_builder_name = "hamilton.base.PandasDataFrameResult"
- elif isinstance(class_to_inspect, base.DictResult):
- result_builder_name = "hamilton.base.DictResult"
- elif isinstance(class_to_inspect, base.NumpyMatrixResult):
- result_builder_name = "hamilton.base.NumpyMatrixResult"
- else:
- result_builder_name = "custom_builder"
- return result_builder_name
-
-
-def create_and_send_cli_event(command: str):
- """Function that creates JSON and sends to track CLI usage.
-
- :param command: the CLI command run.
- """
- event = {
- "api_key": API_KEY,
- "event": CLI_COMMAND,
- "properties": {},
- }
- event["properties"].update(BASE_PROPERTIES)
-
- payload = {
- "command": command,
- }
- event["properties"].update(payload)
- send_event_json(event)
-
-
-def create_and_send_expt_server_event(step: str):
- """Function that creates JSON and sends to track experiment server
usage."""
- event = {
- "api_key": API_KEY,
- "event": EXPERIMENT_SERVER,
- "properties": {},
- }
- event["properties"].update(BASE_PROPERTIES)
- if step in ["startup", "shutdown"]:
- payload = {
- "step": step,
- }
- event["properties"].update(payload)
- send_event_json(event)
+ """Always returns False. Telemetry has been removed."""
+ return False
diff --git a/plugin_tests/h_dask/conftest.py b/plugin_tests/h_dask/conftest.py
index 616fb894..182965bd 100644
--- a/plugin_tests/h_dask/conftest.py
+++ b/plugin_tests/h_dask/conftest.py
@@ -15,11 +15,6 @@
# specific language governing permissions and limitations
# under the License.
-from hamilton import telemetry
-
-# disable telemetry for all tests!
-telemetry.disable_telemetry()
-
# dask_expr got made default, except for python 3.9 and below
import sys
diff --git a/plugin_tests/h_narwhals/conftest.py
b/plugin_tests/h_narwhals/conftest.py
deleted file mode 100644
index b131ad9a..00000000
--- a/plugin_tests/h_narwhals/conftest.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from hamilton import telemetry
-
-# disable telemetry for all tests!
-telemetry.disable_telemetry()
diff --git a/plugin_tests/h_pandas/conftest.py
b/plugin_tests/h_pandas/conftest.py
deleted file mode 100644
index b131ad9a..00000000
--- a/plugin_tests/h_pandas/conftest.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from hamilton import telemetry
-
-# disable telemetry for all tests!
-telemetry.disable_telemetry()
diff --git a/plugin_tests/h_polars/conftest.py
b/plugin_tests/h_polars/conftest.py
deleted file mode 100644
index b131ad9a..00000000
--- a/plugin_tests/h_polars/conftest.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from hamilton import telemetry
-
-# disable telemetry for all tests!
-telemetry.disable_telemetry()
diff --git a/plugin_tests/h_ray/conftest.py b/plugin_tests/h_ray/conftest.py
index dd0219c0..2a0690fd 100644
--- a/plugin_tests/h_ray/conftest.py
+++ b/plugin_tests/h_ray/conftest.py
@@ -17,11 +17,6 @@
import sys
-from hamilton import telemetry
-
-# disable telemetry for all tests!
-telemetry.disable_telemetry()
-
# Skip tests that require packages not yet available on Python 3.14
collect_ignore = []
if sys.version_info >= (3, 14):
diff --git a/plugin_tests/h_spark/conftest.py b/plugin_tests/h_spark/conftest.py
deleted file mode 100644
index b131ad9a..00000000
--- a/plugin_tests/h_spark/conftest.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from hamilton import telemetry
-
-# disable telemetry for all tests!
-telemetry.disable_telemetry()
diff --git a/plugin_tests/h_vaex/conftest.py b/plugin_tests/h_vaex/conftest.py
deleted file mode 100644
index b131ad9a..00000000
--- a/plugin_tests/h_vaex/conftest.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from hamilton import telemetry
-
-# disable telemetry for all tests!
-telemetry.disable_telemetry()
diff --git a/tests/conftest.py b/tests/conftest.py
index 0adbe615..7ed250b3 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -17,11 +17,6 @@
import sys
-from hamilton import telemetry
-
-# disable telemetry for all tests!
-telemetry.disable_telemetry()
-
# Skip tests that require packages not yet available on Python 3.14
collect_ignore = []
if sys.version_info >= (3, 14):
diff --git a/tests/test_async_driver.py b/tests/test_async_driver.py
index a0c32879..788ed525 100644
--- a/tests/test_async_driver.py
+++ b/tests/test_async_driver.py
@@ -16,7 +16,6 @@
# under the License.
import asyncio
-from unittest import mock
import pandas as pd
import pytest
@@ -97,41 +96,6 @@ async def test_driver_end_to_end():
}
[email protected]
[email protected]("hamilton.telemetry.send_event_json")
[email protected]("hamilton.telemetry.g_telemetry_enabled", True)
-async def test_driver_end_to_end_telemetry(send_event_json):
- dr = async_driver.AsyncDriver({}, simple_async_module,
result_builder=base.DictResult())
- with mock.patch("hamilton.telemetry.g_telemetry_enabled", False):
- # don't count this telemetry tracking invocation
- all_vars = [var.name for var in dr.list_available_variables() if
var.name != "return_df"]
- result = await dr.execute(final_vars=all_vars, inputs={"external_input":
1})
- result["a"] = result["a"].to_dict()
- result["b"] = result["b"].to_dict()
- assert result == {
- "a": pd.Series([1, 2, 3]).to_dict(),
- "another_async_func": 8,
- "async_func_with_param": 4,
- "b": pd.Series([4, 5, 6]).to_dict(),
- "external_input": 1,
- "non_async_func_with_decorator": {"result_1": 9, "result_2": 5},
- "result_1": 9,
- "result_2": 5,
- "result_3": 1,
- "result_4": 2,
- "return_dict": {"result_3": 1, "result_4": 2},
- "simple_async_func": 2,
- "simple_non_async_func": 7,
- }
- # to ensure the last telemetry invocation finishes executing
- # get all tasks -- and the current task, and await all others.
- tasks = asyncio.all_tasks()
- current_task = asyncio.current_task()
- await asyncio.gather(*[t for t in tasks if t != current_task])
- assert send_event_json.called
- assert len(send_event_json.call_args_list) == 2
-
-
@pytest.mark.asyncio
async def test_async_driver_end_to_end_async_lifecycle_methods():
tracked_calls = []
diff --git a/tests/test_hamilton_driver.py b/tests/test_hamilton_driver.py
index 95b645f1..ffea258f 100644
--- a/tests/test_hamilton_driver.py
+++ b/tests/test_hamilton_driver.py
@@ -15,12 +15,10 @@
# specific language governing permissions and limitations
# under the License.
-from unittest import mock
-import pandas as pd
import pytest
-from hamilton import base, node, telemetry
+from hamilton import base, node
from hamilton.caching.adapter import HamiltonCacheAdapter
from hamilton.driver import (
Builder,
@@ -42,8 +40,6 @@ import tests.resources.test_driver_serde_worker
import tests.resources.test_for_materialization
import tests.resources.very_simple_dag
-telemetry.MAX_COUNT_SESSION = 100
-
"""This file tests driver capabilities.
Anything involving execution is tested for multiple executors/driver
configuration.
Anything not involving execution is tested for just the single driver
configuration.
@@ -220,188 +216,6 @@ def test_driver_variables_exposes_original_function():
assert originating_functions["a"] == (tests.resources.very_simple_dag.b,)
# a is an input
[email protected]("hamilton.telemetry.send_event_json")
-def test_capture_constructor_telemetry_disabled(send_event_json):
- """Tests that we don't do anything if telemetry is disabled."""
- send_event_json.return_value = ""
- Driver({}, tests.resources.tagging) # this will exercise things
underneath.
- assert send_event_json.called is False
-
-
[email protected]("hamilton.telemetry.get_adapter_name")
[email protected]("hamilton.telemetry.send_event_json")
[email protected]("hamilton.telemetry.g_telemetry_enabled", True)
-def test_capture_constructor_telemetry_error(send_event_json,
get_adapter_name):
- """Tests that we don't error if an exception occurs"""
- get_adapter_name.side_effect = ValueError("TELEMETRY ERROR")
- Driver({}, tests.resources.tagging) # this will exercise things
underneath.
- assert send_event_json.called is False
-
-
[email protected]("hamilton.telemetry.send_event_json")
[email protected]("hamilton.telemetry.g_telemetry_enabled", True)
-def test_capture_constructor_telemetry_none_values(send_event_json):
- """Tests that we don't error if there are none values"""
- Driver({}, None, None) # this will exercise things underneath.
- assert send_event_json.called is True
-
-
[email protected]("hamilton.telemetry.send_event_json")
[email protected]("hamilton.telemetry.g_telemetry_enabled", True)
-def test_capture_constructor_telemetry(send_event_json):
- """Tests that we send an event if we could. Validates deterministic
parts."""
- Driver({}, tests.resources.very_simple_dag)
- # assert send_event_json.called is True
- assert len(send_event_json.call_args_list) == 1 # only called once
- # check contents of what it was called with:
- send_event_json_call = send_event_json.call_args_list[0]
- actual_event_dict = send_event_json_call[0][0]
- assert actual_event_dict["api_key"] ==
"phc_mZg8bkn3yvMxqvZKRlMlxjekFU5DFDdcdAsijJ2EH5e"
- assert actual_event_dict["event"] == "os_hamilton_run_start"
- # validate schema
- expected_properties = {
- "$process_person_profile",
- "os_type",
- "os_version",
- "python_version",
- "distinct_id",
- "hamilton_version",
- "telemetry_version",
- "number_of_nodes",
- "number_of_modules",
- "number_of_config_items",
- "decorators_used",
- "graph_adapter_used",
- "result_builder_used",
- "driver_run_id",
- "error",
- "graph_executor_class",
- "lifecycle_adapters_used",
- }
- actual_properties = actual_event_dict["properties"]
- assert set(actual_properties.keys()) == expected_properties
- # validate static parts
- assert actual_properties["error"] is None
- assert actual_properties["number_of_nodes"] == 2 # b, and input a
- assert actual_properties["number_of_modules"] == 1
- assert actual_properties["number_of_config_items"] == 0
- assert actual_properties["number_of_config_items"] == 0
- assert actual_properties["graph_adapter_used"] == "deprecated -- see
lifecycle_adapters_used"
- assert actual_properties["result_builder_used"] ==
"hamilton.base.PandasDataFrameResult"
- assert actual_properties["lifecycle_adapters_used"] ==
["hamilton.base.PandasDataFrameResult"]
-
-
[email protected]("hamilton.telemetry.send_event_json")
[email protected](
- "driver_factory",
- [
- (lambda: Driver({}, tests.resources.very_simple_dag)),
- (
- lambda: (
- Builder()
- .enable_dynamic_execution(allow_experimental_mode=True)
- .with_modules(tests.resources.very_simple_dag)
-
.with_adapter(base.SimplePythonGraphAdapter(base.PandasDataFrameResult()))
- .with_remote_executor(executors.SynchronousLocalTaskExecutor())
- .build()
- )
- ),
- ],
-)
-def test_capture_execute_telemetry_disabled(send_event_json, driver_factory):
- """Tests that we don't do anything if telemetry is disabled."""
- dr = driver_factory()
- results = dr.execute(["b"], inputs={"a": 1})
- expected = pd.DataFrame([{"b": 1}])
- pd.testing.assert_frame_equal(results, expected)
- assert send_event_json.called is False
-
-
[email protected]("hamilton.telemetry.send_event_json")
[email protected]("hamilton.telemetry.g_telemetry_enabled", True)
[email protected](
- "driver_factory",
- [
- (lambda: Driver({}, tests.resources.very_simple_dag)),
- (
- lambda: (
- Builder()
- .enable_dynamic_execution(allow_experimental_mode=True)
- .with_modules(tests.resources.very_simple_dag)
-
.with_adapter(base.SimplePythonGraphAdapter(base.PandasDataFrameResult()))
- .with_remote_executor(executors.SynchronousLocalTaskExecutor())
- .build()
- )
- ),
- ],
-)
-def test_capture_execute_telemetry_error(send_event_json, driver_factory):
- """Tests that we don't error if an exception occurs"""
- send_event_json.side_effect = [None, ValueError("FAKE ERROR"), None]
- dr = driver_factory()
- results = dr.execute(["b"], inputs={"a": 1})
- expected = pd.DataFrame([{"b": 1}])
- pd.testing.assert_frame_equal(results, expected)
- assert send_event_json.called is True
- assert len(send_event_json.call_args_list) == 2
-
-
[email protected]("hamilton.telemetry.send_event_json")
[email protected]("hamilton.telemetry.g_telemetry_enabled", True)
[email protected](
- "driver_factory",
- [
- (lambda: Driver({}, tests.resources.very_simple_dag)),
- (
- lambda: (
- Builder()
- .enable_dynamic_execution(allow_experimental_mode=True)
- .with_modules(tests.resources.very_simple_dag)
-
.with_adapter(base.SimplePythonGraphAdapter(base.PandasDataFrameResult()))
- .with_remote_executor(executors.SynchronousLocalTaskExecutor())
- .build()
- )
- ),
- ],
-)
-def test_capture_execute_telemetry(send_event_json, driver_factory):
- """Happy path with values passed."""
- dr = driver_factory()
- results = dr.execute(["b"], inputs={"a": 1}, overrides={"b": 2})
- expected = pd.DataFrame([{"b": 2}])
- pd.testing.assert_frame_equal(results, expected)
- assert send_event_json.called is True
- assert len(send_event_json.call_args_list) == 2
-
-
[email protected]("hamilton.telemetry.send_event_json")
[email protected]("hamilton.telemetry.g_telemetry_enabled", True)
[email protected](
- "driver_factory",
- [
- (lambda: Driver({"a": 1}, tests.resources.very_simple_dag)),
- (
- lambda: (
- Builder()
- .enable_dynamic_execution(allow_experimental_mode=True)
- .with_modules(tests.resources.very_simple_dag)
-
.with_adapter(base.SimplePythonGraphAdapter(base.PandasDataFrameResult()))
- .with_remote_executor(executors.SynchronousLocalTaskExecutor())
- .with_config({"a": 1})
- .build()
- )
- ),
- ],
-)
-def test_capture_execute_telemetry_none_values(send_event_json,
driver_factory):
- """Happy path with none values."""
- dr = driver_factory()
- results = dr.execute(["b"])
- expected = pd.DataFrame([{"b": 1}])
- pd.testing.assert_frame_equal(results, expected)
- assert len(send_event_json.call_args_list) == 2
-
-
@pytest.mark.parametrize(
"driver_factory",
[
diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py
deleted file mode 100644
index 52ba2878..00000000
--- a/tests/test_telemetry.py
+++ /dev/null
@@ -1,237 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import configparser
-import os
-import sys
-import uuid
-from typing import Any
-from unittest import mock
-
-import pytest
-
-from hamilton import async_driver, base, node, telemetry
-from hamilton.lifecycle import base as lifecycle_base
-
-telemetry.MAX_COUNT_SESSION = 100
-
-
[email protected]
-def blank_conf_file(tmp_path_factory):
- """Fixture to load config file without an ID"""
- file_location = tmp_path_factory.mktemp("home") / "hamilton.conf"
- with open(file_location, "w") as conf_file:
- conf = configparser.ConfigParser()
- conf.write(conf_file)
- return file_location
-
-
[email protected]
-def existing_conf_file(tmp_path_factory):
- """Fixture to load config file with an ID"""
- file_location = tmp_path_factory.mktemp("home") / "hamilton.conf"
- with open(file_location, "w") as conf_file:
- conf = configparser.ConfigParser()
- conf["DEFAULT"]["anonymous_id"] = "testing123-id"
- conf.write(conf_file)
- return file_location
-
-
-def test__load_config_exists_with_id(existing_conf_file):
- """Tests loading a config that has an ID."""
- config = telemetry._load_config(existing_conf_file)
- a_id = config["DEFAULT"]["anonymous_id"]
- assert a_id == "testing123-id"
-
-
-def test__load_config_exists_without_id(blank_conf_file):
- """Tests load from existing file without an ID."""
- config = telemetry._load_config(blank_conf_file)
- a_id = config["DEFAULT"]["anonymous_id"]
- assert str(uuid.UUID(a_id, version=4)) == a_id
- # check it was written back
- with open(blank_conf_file, "r") as conf_file:
- actual_config = configparser.ConfigParser()
- actual_config.read_file(conf_file)
- assert a_id == actual_config["DEFAULT"]["anonymous_id"]
-
-
-def test__load_config_new(tmp_path_factory):
- """Tests no config file existing and one being created."""
- file_location = tmp_path_factory.mktemp("home") / "hamilton123.conf"
- config = telemetry._load_config(file_location)
- a_id = config["DEFAULT"]["anonymous_id"]
- assert str(uuid.UUID(a_id, version=4)) == a_id
- # check it was written back
- with open(file_location, "r") as conf_file:
- actual_config = configparser.ConfigParser()
- actual_config.read_file(conf_file)
- assert a_id == actual_config["DEFAULT"]["anonymous_id"]
-
-
[email protected](os.environ, {"HAMILTON_TELEMETRY_ENABLED": ""})
-def test__check_config_and_environ_for_telemetry_flag_not_present():
- """Tests not present in both."""
- conf = configparser.ConfigParser()
- actual = telemetry._check_config_and_environ_for_telemetry_flag(False,
conf)
- assert actual is False
-
-
[email protected](os.environ, {"HAMILTON_TELEMETRY_ENABLED": ""})
-def test__check_config_and_environ_for_telemetry_flag_in_config():
- """tests getting from config."""
- conf = configparser.ConfigParser()
- conf["DEFAULT"]["telemetry_enabled"] = "tRuE"
- actual = telemetry._check_config_and_environ_for_telemetry_flag(False,
conf)
- assert actual is True
-
-
[email protected](os.environ, {"HAMILTON_TELEMETRY_ENABLED": "TrUe"})
-def test__check_config_and_environ_for_telemetry_flag_in_env():
- """tests getting from env."""
- conf = configparser.ConfigParser()
- actual = telemetry._check_config_and_environ_for_telemetry_flag(False,
conf)
- assert actual is True
-
-
[email protected](os.environ, {"HAMILTON_TELEMETRY_ENABLED": "TrUe"})
-def test__check_config_and_environ_for_telemetry_flag_env_overrides():
- """tests that env overrides the config."""
- conf = configparser.ConfigParser()
- conf["DEFAULT"]["telemetry_enabled"] = "FALSE"
- actual = telemetry._check_config_and_environ_for_telemetry_flag(False,
conf)
- assert actual is True
-
-
[email protected](
- os.environ.get("CI") != "true",
- reason="This test is currently flaky when run locally -- "
- "it has to be run exactly as it is in CI. "
- "As it is not a high-touch portion of the codebase, "
- "we default it not to run locally.",
-)
-def test_sanitize_error_general():
- """Tests sanitizing code in the general case.
-
- Run this test how circleci runs it.
-
- It's too hard to test code that isn't in the repo, or at least it hasn't
occurred to
- me how to mock it easily.
- """
- try:
- # make a call in a hamilton module to mimic something from hamilton
- # but the stack trace should block the stack call from this function.
- telemetry.get_adapter_name(None)
- except AttributeError:
- actual = telemetry.sanitize_error(*sys.exc_info())
- # this strips the full path -- note: line changes in telemetry.py will
change this...
- # so replace with line XXX
- import re
-
- actual = re.sub(r"line \d\d\d", "line XXX", actual)
- expected = """...hamilton/hamilton/tests/test_telemetry.py, line XXX,
in test_sanitize_error_general\n...hamilton/hamilton/hamilton/telemetry.py,
line XXX, in get_adapter_name\n"""
-
- # if this fails -- run it how github actions run it
- assert actual == expected
-
-
-# classes for the tests below
-class CustomAdapter(base.HamiltonGraphAdapter):
- @staticmethod
- def check_input_type(node_type: type, input_value: Any) -> bool:
- pass
-
- @staticmethod
- def check_node_type_equivalence(node_type: type, input_type: type) -> bool:
- pass
-
- def execute_node(self, node: node.Node, kwargs: dict[str, Any]) -> Any:
- pass
-
- def __init__(self, result_builder: base.ResultMixin):
- self.result_builder = result_builder
-
-
-class CustomResultBuilder(base.ResultMixin):
- pass
-
-
[email protected](
- ("adapter", "expected"),
- [
- (
- base.SimplePythonDataFrameGraphAdapter(),
- "hamilton.base.SimplePythonDataFrameGraphAdapter",
- ),
- (
- base.DefaultAdapter(),
- "hamilton.base.DefaultAdapter",
- ),
- (
- async_driver.AsyncGraphAdapter(base.DictResult()),
- "hamilton.async_driver.AsyncGraphAdapter",
- ),
- (CustomAdapter(base.DictResult()), "custom_adapter"),
- ],
-)
-def test_get_adapter_name(adapter, expected):
- """Tests get_adapter_name"""
- actual = telemetry.get_adapter_name(adapter)
- assert actual == expected
-
-
[email protected](
- ("adapter", "expected"),
- [
- (base.SimplePythonDataFrameGraphAdapter(),
"hamilton.base.PandasDataFrameResult"),
- (base.DefaultAdapter(), "hamilton.base.DictResult"),
- (
- base.SimplePythonGraphAdapter(base.NumpyMatrixResult()),
- "hamilton.base.NumpyMatrixResult",
- ),
- (
-
base.SimplePythonGraphAdapter(base.StrictIndexTypePandasDataFrameResult()),
- "hamilton.base.StrictIndexTypePandasDataFrameResult",
- ),
- (base.SimplePythonGraphAdapter(CustomResultBuilder()),
"custom_builder"),
- (async_driver.AsyncGraphAdapter(base.DictResult()),
"hamilton.base.DictResult"),
- (CustomAdapter(base.DictResult()), "hamilton.base.DictResult"),
- (CustomAdapter(CustomResultBuilder()), "custom_builder"),
- ],
-)
-def test_get_result_builder_name(adapter, expected):
- """Tests getting the result builder name. This is largely backwards
compatibility
- but still provides nice information as to the provided tooling the user
leverages."""
- actual =
telemetry.get_result_builder_name(lifecycle_base.LifecycleAdapterSet(adapter))
- assert actual == expected
-
-
-def test_is_telemetry_enabled_false():
- """Tests that we don't increment the counter when we're disabled."""
- before = telemetry.call_counter
- telemetry_enabled = telemetry.is_telemetry_enabled()
- assert telemetry.call_counter == before
- assert telemetry_enabled is False
-
-
[email protected]("hamilton.telemetry.g_telemetry_enabled", True)
-def test_is_telemetry_disabled_true():
- """Tests that we do increment the counter when we're enabled."""
- before = telemetry.call_counter
- telemetry_enabled = telemetry.is_telemetry_enabled()
- assert telemetry.call_counter == before + 1
- assert telemetry_enabled is True
diff --git a/ui/backend/server/trackingserver_base/apps.py
b/ui/backend/server/trackingserver_base/apps.py
index d6c2ccd1..3a04552b 100644
--- a/ui/backend/server/trackingserver_base/apps.py
+++ b/ui/backend/server/trackingserver_base/apps.py
@@ -15,32 +15,10 @@
# specific language governing permissions and limitations
# under the License.
-import os.path
-import uuid
-
from django.apps import AppConfig
from django.conf import settings
from django.db import models
-from hamilton.telemetry import API_KEY, BASE_PROPERTIES, is_telemetry_enabled,
send_event_json
-
-
-def create_server_event_json(telemetry_key: str) -> dict:
- """Function to create payload for tracking server event.
-
- :param event_name: the name of the server event
- :return: dict representing the JSON to send.
- """
- old_anonymous_id = BASE_PROPERTIES["distinct_id"]
- event = {
- "event": "os_hamilton_ui_server_start",
- "api_key": API_KEY,
- "properties": {"telemetry_key": telemetry_key, "old_anonymous_id":
old_anonymous_id},
- }
- event["properties"].update(BASE_PROPERTIES)
- event["properties"]["distinct_id"] = telemetry_key
- return event
-
def set_max_length_for_charfield(model_class, field_name, max_length=1024):
field = model_class._meta.get_field(field_name)
@@ -51,18 +29,6 @@ class TrackingServerConfig(AppConfig):
default_auto_field = "django.db.models.BigAutoField"
name = "trackingserver_base"
- def enable_telemetry(self):
- if is_telemetry_enabled() and settings.HAMILTON_ENV == "local":
- telemetry_file = "/tmp/hamilton-telemetry.txt"
- if not os.path.exists(telemetry_file):
- telemetry_key = str(uuid.uuid4())
- with open(telemetry_file, "w") as f:
- f.write(telemetry_key)
- else:
- with open(telemetry_file, "r") as f:
- telemetry_key = f.read().strip()
- send_event_json(create_server_event_json(telemetry_key))
-
def sqllite_compatibility(self):
if settings.DATABASES["default"]["ENGINE"] ==
"django.db.backends.sqlite3":
from django.apps import apps
@@ -73,5 +39,4 @@ class TrackingServerConfig(AppConfig):
set_max_length_for_charfield(model, field.name)
def ready(self):
- self.enable_telemetry()
self.sqllite_compatibility()
diff --git a/ui/sdk/tests/conftest.py b/ui/sdk/tests/conftest.py
deleted file mode 100644
index ce510679..00000000
--- a/ui/sdk/tests/conftest.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-from hamilton import telemetry
-
-telemetry.disable_telemetry()