This is an automated email from the ASF dual-hosted git repository.
jernejfrank pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hamilton.git
The following commit(s) were added to refs/heads/main by this push:
new 3c32aca5 Migrate to GitHub acitions (#1342)
3c32aca5 is described below
commit 3c32aca5cdf4f94d7c52c6c7271b2a0bc27ee3dc
Author: jf <[email protected]>
AuthorDate: Fri Jun 20 08:47:32 2025 +0100
Migrate to GitHub acitions (#1342)
Solve #1313, also use uv.
- disable fail-fast to see all of the tests in the matrix failing
- support only Linux for now
- add option to run the tests manually through the actions tab
- run test (mostly plugins) on all supported python versions of the plugin
package:
- Ray, Dask, PySpark 3.9-3.12
- pre-commit 3.8-3.12
- pandas, polars, narwhals 3.8-3.12
- Vaex 3.8-3.10
Squashed commits:
* Migrate CircleCI to actions
Limit to linux os for now
* Fix linting issues
* Fix tests and minor bug issues
* Fix SDK
Fix Polars hist lower bound tests
Fix test by changing sql.DataFrame to sql.classic.DataFrame
Fix sanitize error to work on CI
* Fix pandas/polars plugin tests
* Pin dask minimal dependency that resolve bug
* Fix ray venv bug
* Add grpcio dependency for spark
* Try force reinstall grcpio stuff
* Disable pyspark ANSI
* Try to fix SparkInputValidator test
* Add reason for fail-safe false
---
.ci/setup.sh | 49 -------
.ci/test.sh | 80 -----------
.circleci/config.yml | 181 -------------------------
.github/workflows/hamilton-main.yml | 163 ++++++++++++++++++++++
examples/validate_examples.py | 2 +
plugin_tests/h_pandas/test_with_columns.py | 6 +-
plugin_tests/h_polars/test_with_columns.py | 6 +-
plugin_tests/h_spark/test_h_spark.py | 16 +--
pyproject.toml | 22 +--
scripts/test_memory.py | 4 +-
tests/test_base.py | 7 +-
tests/test_telemetry.py | 7 +-
ui/sdk/src/hamilton_sdk/adapters.py | 12 +-
ui/sdk/tests/tracking/test_polars_col_stats.py | 2 +-
ui/sdk/tests/tracking/test_polars_stats.py | 4 +-
ui/sdk/tests/tracking/test_pyspark_stats.py | 2 +-
writeups/garbage_collection/memory_test.py | 2 +-
17 files changed, 207 insertions(+), 358 deletions(-)
diff --git a/.ci/setup.sh b/.ci/setup.sh
deleted file mode 100755
index 3feca833..00000000
--- a/.ci/setup.sh
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/bin/bash
-
-set -e -u -o pipefail
-
-OPERATING_SYSTEM=$(uname -s)
-
-if [[ ${OPERATING_SYSTEM} == "Linux" ]]; then
- sudo apt-get update -y
- sudo apt-get install \
- --no-install-recommends \
- --yes \
- graphviz
-fi
-
-# setting up a virtualenv isn't necessary for the "pre-commit" task
-if [[ ${TASK} != "pre-commit" ]]; then
- mkdir -p "${HOME}/venvs/hamilton-venv"
- python -m venv "${HOME}/venvs/hamilton-venv" # TODO: add --upgrade-deps
after dropping support for py3.8
- source "${HOME}/venvs/hamilton-venv/bin/activate"
- pip install ".[test]"
-fi
-
-if [[ ${TASK} == "pyspark" ]]; then
- if [[ ${OPERATING_SYSTEM} == "Linux" ]]; then
- sudo apt-get install \
- --no-install-recommends \
- --yes \
- default-jre
- fi
-fi
-
-if [[ ${TASK} == "vaex" ]]; then
- if [[ ${OPERATING_SYSTEM} == "Linux" ]]; then
- sudo apt-get install \
- --no-install-recommends \
- --yes \
- libpcre3-dev cargo
- fi
-fi
-
-echo "----- python version -----"
-python --version
-
-echo "----- pip version -----"
-pip --version
-echo "-----------------------"
-
-# disable telemetry!
-export HAMILTON_TELEMETRY_ENABLED=false
diff --git a/.ci/test.sh b/.ci/test.sh
deleted file mode 100755
index 1d2542ec..00000000
--- a/.ci/test.sh
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/bin/bash
-
-set -e -u -o pipefail
-
-echo "running CI task '${TASK}'"
-
-if [[ ${TASK} == "pre-commit" ]]; then
- pip install pre-commit
- pre-commit run --all-files
- exit 0
-fi
-
-echo "using venv at '${HOME}/venvs/hamilton-venv/bin/activate'"
-source "${HOME}/venvs/hamilton-venv/bin/activate"
-
-if [[ ${TASK} == "async" ]]; then
- pip install .
- pytest plugin_tests/h_async
- exit 0
-fi
-
-if [[ ${TASK} == "dask" ]]; then
- pip install -e '.[dask]'
- pytest plugin_tests/h_dask
- exit 0
-fi
-
-if [[ ${TASK} == "integrations" ]]; then
- pip install -e '.[pandera, test]'
- pip install -r tests/integrations/pandera/requirements.txt
- if python -c 'import sys; exit(0) if sys.version_info[:2] == (3, 9) else
exit(1)'; then
- echo "Python version is 3.9"
- pip install dask-expr
- else
- echo "Python version is not 3.9"
- fi
- pytest tests/integrations
- exit 0
-fi
-
-if [[ ${TASK} == "ray" ]]; then
- pip install -e '.[ray]'
- pytest plugin_tests/h_ray
- exit 0
-fi
-
-if [[ ${TASK} == "pyspark" ]]; then
- pip install -e '.[pyspark]'
- pip install 'numpy<2' 'pyspark[connect]' # downgrade until spark fixes
their bug
- pytest plugin_tests/h_spark
- exit 0
-fi
-
-if [[ ${TASK} == "vaex" ]]; then
- pip install "numpy<2.0.0" # numpy2.0 breaks vaex
- pip install -e '.[vaex]'
- pytest plugin_tests/h_vaex
- exit 0
-fi
-
-if [[ ${TASK} == "narwhals" ]]; then
- pip install -e .
- pip install polars pandas narwhals
- pytest plugin_tests/h_narwhals
- exit 0
-fi
-
-if [[ ${TASK} == "tests" ]]; then
- pip install .
- # https://github.com/plotly/Kaleido/issues/226
- pip install "kaleido<0.4.0" # kaleido 0.4.0 breaks plotly; TODO: remove
this
- pytest \
- --cov=hamilton \
- --ignore tests/integrations \
- tests/
- exit 0
-fi
-
-echo "ERROR: did not recognize TASK '${TASK}'"
-exit 1
diff --git a/.circleci/config.yml b/.circleci/config.yml
deleted file mode 100644
index 6c101833..00000000
--- a/.circleci/config.yml
+++ /dev/null
@@ -1,181 +0,0 @@
-version: 2.1
-jobs:
- check_for_changes:
- docker:
- - image: circleci/python:3.10
- steps:
- - checkout
- - run:
- name: Check for changes in specific paths
- command: |
- set +e
- git diff --name-only origin/main...HEAD | grep
'^.ci\|^.circleci\|^graph_adapter_tests\|^hamilton\|^plugin_tests\|^tests\|^requirements\|setup'
> /dev/null
- if [ $? -eq 0 ]; then
- echo "Changes found in target paths."
- echo 'true' > /tmp/changes_detected
- else
- echo "No changes found in target paths."
- echo 'false' > /tmp/changes_detected
- fi
- - persist_to_workspace:
- root: /tmp
- paths:
- - changes_detected
- test:
- parameters:
- python-version:
- type: string
- task:
- type: string
- docker:
- - image: cimg/python:<< parameters.python-version >>
- environment:
- TASK: << parameters.task >>
- CI: true
- steps:
- - checkout
- - attach_workspace:
- at: /tmp
- - run:
- name: Check if changes were detected
- command: |
- if grep -q 'false' /tmp/changes_detected; then
- echo "No changes detected, skipping job..."
- circleci-agent step halt
- fi
- - run:
- name: install dependencies
- command: .ci/setup.sh
- - run:
- name: run tests
- command: .ci/test.sh
-workflows:
- unit-test-workflow:
- jobs:
- - check_for_changes
- - test:
- requires:
- - check_for_changes
- name: build-py38
- python-version: '3.8'
- task: tests
- - test:
- requires:
- - check_for_changes
- name: build-py39
- python-version: '3.9'
- task: tests
- - test:
- requires:
- - check_for_changes
- name: build-py310
- python-version: '3.10'
- task: tests
- - test:
- name: build-py311
- python-version: '3.11'
- task: tests
- - test:
- name: build-py312
- python-version: '3.12'
- task: tests
- - test:
- name: pre-commit
- python-version: '3.11'
- task: pre-commit
- - test:
- requires:
- - check_for_changes
- name: dask-py39
- python-version: '3.9'
- task: dask
- - test:
- requires:
- - check_for_changes
- name: dask-py311
- python-version: '3.11'
- task: dask
- - test:
- requires:
- - check_for_changes
- name: ray-py11
- python-version: '3.11'
- task: ray
- - test:
- requires:
- - check_for_changes
- name: vaex-py310
- python-version: '3.10'
- task: vaex
- - test:
- requires:
- - check_for_changes
- name: spark-py39
- python-version: '3.9'
- task: pyspark
- - test:
- requires:
- - check_for_changes
- name: spark-py310
- python-version: '3.10'
- task: pyspark
- - test:
- requires:
- - check_for_changes
- name: spark-py311
- python-version: '3.11'
- task: pyspark
- - test:
- requires:
- - check_for_changes
- name: spark-py312
- python-version: '3.12'
- task: pyspark
- - test:
- requires:
- - check_for_changes
- name: integrations-py38
- python-version: '3.8'
- task: integrations
- - test:
- requires:
- - check_for_changes
- name: integrations-py39
- python-version: '3.9'
- task: integrations
- - test:
- requires:
- - check_for_changes
- name: integrations-py310
- python-version: '3.10'
- task: integrations
- - test:
- requires:
- - check_for_changes
- name: integrations-py311
- python-version: '3.11'
- task: integrations
- - test:
- requires:
- - check_for_changes
- name: integrations-py312
- python-version: '3.12'
- task: integrations
- - test:
- requires:
- - check_for_changes
- name: narwhals-py39
- python-version: '3.9'
- task: narwhals
- - test:
- requires:
- - check_for_changes
- name: narwhals-py310
- python-version: '3.10'
- task: narwhals
- - test:
- requires:
- - check_for_changes
- name: narwhals-py311
- python-version: '3.11'
- task: narwhals
diff --git a/.github/workflows/hamilton-main.yml
b/.github/workflows/hamilton-main.yml
new file mode 100644
index 00000000..d7ef1dae
--- /dev/null
+++ b/.github/workflows/hamilton-main.yml
@@ -0,0 +1,163 @@
+name: Unit Tests
+
+on:
+ workflow_dispatch:
+
+ pull_request:
+ branches:
+ - main
+ paths:
+ - '.github/**'
+ - 'hamilton/**'
+ - 'plugin_tests/**'
+ - 'tests/**'
+ - 'pyproject.toml'
+
+jobs:
+ test:
+ name: "Unit Tests"
+ runs-on: ${{ matrix.os }}
+ strategy:
+ fail-fast: false # want to see for each version if fails are different
+ matrix:
+ os:
+ - ubuntu-latest
+ python-version:
+ - '3.8'
+ - '3.9'
+ - '3.10'
+ - '3.11'
+ - '3.12'
+ env:
+ UV_PRERELEASE: "allow"
+ HAMILTON_TELEMETRY_ENABLED: false
+
+ steps:
+ - name: Install Graphviz on Linux
+ if: runner.os == 'Linux'
+ run: sudo apt-get update && sudo apt-get install --yes
--no-install-recommends graphviz
+
+ - name: Install Graphviz on Windows
+ if: runner.os == 'Windows'
+ run: choco install graphviz
+ shell: powershell
+
+ - name: Install Graphviz on macOS
+ if: runner.os == 'macOS'
+ run: |
+ brew install graphviz
+ brew install libomp
+
+ - name: Checkout repository
+ uses: actions/checkout@v4
+
+ - name: Install uv and set the python version
+ uses: astral-sh/setup-uv@v6
+ with:
+ python-version: ${{ matrix.python-version }}
+ enable-cache: true
+ cache-dependency-glob: "uv.lock"
+ activate-environment: true
+
+ # It's enough to do it on single OS
+ - name: Check linting with pre-commit
+ if: ${{ runner.os == 'Linux' }}
+ run: |
+ uv sync --extra dev
+ uv run pre-commit install
+ uv run pre-commit run --all-files
+
+ - name: Test hamilton main package
+ run: |
+ uv sync --extra test
+ uv pip install "kaleido<0.4.0"
+ uv run pytest tests/ --cov=hamilton --ignore tests/integrations
+
+
+ - name: Test integrations
+ if: ${{ matrix.python-version == '3.9' }}
+ run: |
+ uv sync --extra test --extra pandera
+ uv pip install -r tests/integrations/pandera/requirements.txt
+ uv pip install dask-expr
+ uv run pytest tests/integrations
+
+ - name: Test integrations
+ if: ${{ matrix.python-version != '3.9' }}
+ run: |
+ uv sync --extra test --extra pandera
+ uv pip install -r tests/integrations/pandera/requirements.txt
+ uv run pytest tests/integrations
+
+ - name: Test pandas
+ run: |
+ uv sync --extra test
+ uv run pytest plugin_tests/h_pandas
+
+ - name: Test polars
+ run: |
+ uv sync --extra test
+ uv pip install polars
+ uv run pytest plugin_tests/h_polars
+
+ - name: Test narwhals
+ run: |
+ uv sync --extra test
+ uv pip install polars pandas narwhals
+ uv run pytest plugin_tests/h_narwhals
+
+ - name: Test dask
+ # Dask supports >= py3.9
+ if: ${{ matrix.python-version != '3.8' }}
+ run: |
+ uv sync --extra test --extra dask
+ uv run pytest plugin_tests/h_dask
+
+ - name: Test ray
+ # Ray supports >= py3.9
+ if: ${{ matrix.python-version != '3.8' }}
+ env:
+ RAY_ENABLE_UV_RUN_RUNTIME_ENV: 0 #
https://github.com/ray-project/ray/issues/53848
+ run: |
+ uv sync --extra test --extra ray
+ uv run pytest plugin_tests/h_ray
+
+ - name: Test pyspark
+ # Spark supports >= py3.9
+ if: ${{ matrix.python-version != '3.8' && runner.os == 'Linux' }}
+ env:
+ PYSPARK_SUBMIT_ARGS: "--conf spark.sql.ansi.enabled=false
pyspark-shell"
+ run: |
+ sudo apt-get install --no-install-recommends --yes default-jre
+ uv sync --extra test --extra pyspark
+ uv pip install 'numpy<2' 'pyspark[connect]' 'grpcio'
+ uv pip install --no-cache --reinstall --strict 'grpcio-status >=
1.48.1'
+ uv run pytest plugin_tests/h_spark
+
+ - name: Test pyspark
+ # Spark supports >= py3.9
+ if: ${{ matrix.python-version != '3.8' && runner.os != 'Linux' }}
+ env:
+ PYSPARK_SUBMIT_ARGS: "--conf spark.sql.ansi.enabled=false
pyspark-shell"
+ run: |
+ uv sync --extra test --extra pyspark
+ uv pip install 'numpy<2' 'pyspark[connect]' 'grpcio'
+ uv pip install --no-cache --reinstall --strict 'grpcio-status >=
1.48.1'
+ uv run pytest plugin_tests/h_spark
+
+ - name: Test vaex
+ # Vaex supports <= py3.10 and numpy<2
+ if: ${{ runner.os == 'Linux' && (matrix.python-version == '3.8' ||
matrix.python-version == '3.9' || matrix.python-version == '3.10') }}
+ run: |
+ sudo apt-get install --no-install-recommends --yes libpcre3-dev
cargo
+ uv sync --extra test --extra vaex
+ uv pip install "numpy<2"
+ uv run pytest plugin_tests/h_vaex
+
+ - name: Test vaex
+ # Vaex supports <= py3.10 and numpy<2
+ if: ${{ runner.os != 'Linux' && (matrix.python-version == '3.8' ||
matrix.python-version == '3.9' || matrix.python-version == '3.10') }}
+ run: |
+ uv sync --extra test --extra vaex
+ uv pip install "numpy<2"
+ uv run pytest plugin_tests/h_vaex
diff --git a/examples/validate_examples.py b/examples/validate_examples.py
index d85dcbbf..fb361158 100644
--- a/examples/validate_examples.py
+++ b/examples/validate_examples.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
import argparse
import logging
import pathlib
diff --git a/plugin_tests/h_pandas/test_with_columns.py
b/plugin_tests/h_pandas/test_with_columns.py
index f9012e71..85076c4f 100644
--- a/plugin_tests/h_pandas/test_with_columns.py
+++ b/plugin_tests/h_pandas/test_with_columns.py
@@ -246,7 +246,7 @@ def test_append_into_original_df():
upstream_df=dummy_df(),
dummy_fn_with_columns=dummy_fn_with_columns(col_1=pd.Series([1, 2, 3,
4])),
)
- assert merge_node.name == "__append"
+ assert merge_node.name == "_append"
assert merge_node.type == pd.DataFrame
pd.testing.assert_series_equal(output_df["col_1"], pd.Series([1, 2, 3,
4]), check_names=False)
@@ -275,7 +275,7 @@ def test_override_original_column_in_df():
merge_node = output_nodes[-1]
output_df = merge_node.callable(upstream_df=dummy_df(), col_1=col_1())
- assert merge_node.name == "__append"
+ assert merge_node.name == "_append"
assert merge_node.type == pd.DataFrame
pd.testing.assert_series_equal(output_df["col_1"], pd.Series([0, 3, 5,
7]), check_names=False)
@@ -303,7 +303,7 @@ def test_assign_custom_namespace_with_columns():
assert nodes_[0].name == "target_fn"
assert nodes_[1].name == "dummy_namespace.dummy_fn_with_columns"
assert nodes_[2].name == "dummy_namespace.col_1"
- assert nodes_[3].name == "dummy_namespace.__append"
+ assert nodes_[3].name == "dummy_namespace._append"
def test_end_to_end_with_columns_automatic_extract():
diff --git a/plugin_tests/h_polars/test_with_columns.py
b/plugin_tests/h_polars/test_with_columns.py
index 151347fb..892fb4ce 100644
--- a/plugin_tests/h_polars/test_with_columns.py
+++ b/plugin_tests/h_polars/test_with_columns.py
@@ -144,7 +144,7 @@ def test_append_into_original_df():
upstream_df=dummy_df(),
dummy_fn_with_columns=dummy_fn_with_columns(col_1=pl.Series([1, 2, 3,
4])),
)
- assert merge_node.name == "__append"
+ assert merge_node.name == "_append"
assert merge_node.type == pl.DataFrame
pl.testing.assert_series_equal(output_df["col_1"], pl.Series([1, 2, 3,
4]), check_names=False)
@@ -174,7 +174,7 @@ def test_override_original_column_in_df():
merge_node = output_nodes[-1]
output_df = merge_node.callable(upstream_df=dummy_df(), col_1=col_1())
- assert merge_node.name == "__append"
+ assert merge_node.name == "_append"
assert merge_node.type == pl.DataFrame
pl.testing.assert_series_equal(
@@ -204,7 +204,7 @@ def test_assign_custom_namespace_with_columns():
assert nodes_[0].name == "target_fn"
assert nodes_[1].name == "dummy_namespace.dummy_fn_with_columns"
assert nodes_[2].name == "dummy_namespace.col_1"
- assert nodes_[3].name == "dummy_namespace.__append"
+ assert nodes_[3].name == "dummy_namespace._append"
def test_end_to_end_with_columns_automatic_extract():
diff --git a/plugin_tests/h_spark/test_h_spark.py
b/plugin_tests/h_spark/test_h_spark.py
index 36bc295a..2532c35c 100644
--- a/plugin_tests/h_spark/test_h_spark.py
+++ b/plugin_tests/h_spark/test_h_spark.py
@@ -6,7 +6,6 @@ import pyspark.pandas as ps
import pytest
from pyspark import Row
from pyspark.sql import Column, DataFrame, SparkSession, types
-from pyspark.sql.connect.dataframe import DataFrame as CDataFrame
from pyspark.sql.connect.session import SparkSession as CSparkSession
from pyspark.sql.functions import column
@@ -889,19 +888,10 @@ def test_create_selector_node(spark_session):
)
-def test_spark_input_adapter_dataframe():
+def test_spark_input_adapter_dataframe(spark_session):
# We have to do these at is is very difficult to mock out connect.x objects
-
- class ConnectDataFrame(CDataFrame):
- def __init__(self):
- pass
-
- def __repr__(self):
- return "df"
-
- assert SparkInputValidator().do_validate_input(
- node_type=DataFrame, input_value=ConnectDataFrame()
- )
+ df = spark_session.range(1)
+ assert SparkInputValidator().do_validate_input(node_type=DataFrame,
input_value=df)
def test_spark_input_adapter_connector():
diff --git a/pyproject.toml b/pyproject.toml
index 68130ffd..16480381 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,6 @@ dependencies = [
cli = ["typer"]
dask = ["dask[complete]"] # commonly you'll want everything.
dask-array = ["dask[array]"]
-dask-core = ["dask-core"]
dask-dataframe = ["dask[dataframe]"]
dask-diagnostics = ["dask[diagnostics]"]
dask-distributed = ["dask[distributed]"]
@@ -51,14 +50,13 @@ docs = [
"sf-hamilton[dev]",
"alabaster>=0.7,<0.8,!=0.7.5", # read the docs pins
"commonmark==0.9.1", # read the docs pins
- "dask-expr; python_version == '3.9'",
+ "dask-expr>=1.1.14; python_version >= '3.10'", # Bugfix only available after
py3.10 https://github.com/dask/dask-expr/pull/1150
"dask[distributed]",
"ddtrace<3.0",
"diskcache",
# required for all the plugins
"dlt",
- # furo -- install from main for now until the next release is out:
- "furo @ git+https://github.com/pradyunsg/furo@main",
+ "furo",
"gitpython", # Required for parsing git info for generation of data-adapter
docs
"grpcio-status",
"lightgbm",
@@ -69,6 +67,7 @@ docs = [
"myst-nb",
"narwhals",
"numpy < 2.0.0",
+ "packaging",
"pandera",
"pillow",
"polars",
@@ -104,17 +103,18 @@ pandera = ["pandera"]
pydantic = ["pydantic>=2.0"]
pyspark = [
# we have to run these dependencies because Spark does not check to ensure
the right target was called
- "pyspark[pandas_on_spark,sql]"
+ "pyspark[pandas_on_spark,sql]",
]
ray = ["ray>=2.0.0", "pyarrow"]
rich = ["rich"]
sdk = ["sf-hamilton-sdk"]
slack = ["slack-sdk"]
test = [
- "connectorx",
+ "connectorx<=0.3.2; python_version=='3.8'",
+ "connectorx; python_version!='3.8'",
"dask[complete]",
- "dask-expr; python_version == '3.9'",
- "datasets", # huggingface datasets
+ "dask-expr>=1.1.14; python_version >= '3.10'", # Bugfix only available after
py3.10 https://github.com/dask/dask-expr/pull/1150
+ "datasets>=2.18.0", # huggingface datasets --
https://github.com/huggingface/datasets/issues/6737#issuecomment-2107336816
"diskcache",
"dlt",
"fsspec",
@@ -149,7 +149,11 @@ test = [
]
tqdm = ["tqdm"]
ui = ["sf-hamilton-ui"]
-vaex = ["vaex"]
+
+# vaex -- on >=py3.11 only core part available
https://github.com/vaexio/vaex/pull/2331#issuecomment-2437198176
+vaex = [
+ "vaex; python_version <= '3.10'"
+ ]
visualization = ["graphviz", "networkx"]
[project.entry-points.console_scripts]
diff --git a/scripts/test_memory.py b/scripts/test_memory.py
index a3358842..8567fca7 100644
--- a/scripts/test_memory.py
+++ b/scripts/test_memory.py
@@ -49,7 +49,7 @@ count = 0
@parameterize(
- **{f"foo_{i}": {"foo_i_minus_one": source(f"foo_{i-1}")} for i in range(1,
NUM_ITERS)}
+ **{f"foo_{i}": {"foo_i_minus_one": source(f"foo_{i - 1}")} for i in
range(1, NUM_ITERS)}
)
def foo_i(foo_i_minus_one: pd.DataFrame) -> pd.DataFrame:
global count
@@ -61,4 +61,4 @@ def foo_i(foo_i_minus_one: pd.DataFrame) -> pd.DataFrame:
if __name__ == "__main__":
mod = create_temporary_module(foo_i, foo_0)
dr = driver.Builder().with_modules(mod).build()
- output = dr.execute([f"foo_{NUM_ITERS-1}"],
inputs=dict(memory_size=100_000_000))
+ output = dr.execute([f"foo_{NUM_ITERS - 1}"],
inputs=dict(memory_size=100_000_000))
diff --git a/tests/test_base.py b/tests/test_base.py
index 447d506d..e8a44c9a 100644
--- a/tests/test_base.py
+++ b/tests/test_base.py
@@ -5,6 +5,7 @@ import numpy as np
import pandas as pd
import pytest
from numpy import testing
+from packaging import version
from hamilton import base
@@ -279,7 +280,7 @@ def
test_PandasDataFrameResult_build_dataframe_with_dataframes(outputs, expected
# Still supporting old pandas version, although we should phase off...
int_64_index = "Index:::int64" if pd.__version__ >= "2.0.0" else
"RangeIndex:::int64"
-PD_VERSION = tuple(int(item) for item in pd.__version__.split("."))
+PD_VERSION = version.parse(pd.__version__)
@pytest.mark.parametrize(
@@ -326,7 +327,7 @@ PD_VERSION = tuple(int(item) for item in
pd.__version__.split("."))
{"a": pd.Series([1, 2, 3]).index},
({"Index:::int64": ["a"]}, {}, {}),
marks=pytest.mark.skipif(
- PD_VERSION < (2, 0, 0),
+ PD_VERSION < version.parse("2.0.0"),
reason="Pandas 2.0 changed default indices but we still "
"support pandas <2.0",
),
),
@@ -334,7 +335,7 @@ PD_VERSION = tuple(int(item) for item in
pd.__version__.split("."))
{"a": pd.Series([1, 2, 3]).index},
({"Int64Index:::int64": ["a"]}, {}, {}),
marks=pytest.mark.skipif(
- PD_VERSION >= (2, 0, 0),
+ PD_VERSION >= version.parse("2.0.0"),
reason="Pandas 2.0 changed default indices but we still "
"support pandas <2.0",
),
),
diff --git a/tests/test_telemetry.py b/tests/test_telemetry.py
index cc378a56..337cd17f 100644
--- a/tests/test_telemetry.py
+++ b/tests/test_telemetry.py
@@ -126,10 +126,9 @@ def test_sanitize_error_general():
import re
actual = re.sub(r"line \d\d\d", "line XXX", actual)
- expected = (
- """...<USER_CODE>...\n...hamilton/telemetry.py, line XXX, in
get_adapter_name\n"""
- )
- # if this fails -- run it how circleci runs it
+ expected = """...hamilton/hamilton/tests/test_telemetry.py, line XXX,
in test_sanitize_error_general\n...hamilton/hamilton/hamilton/telemetry.py,
line XXX, in get_adapter_name\n"""
+
+ # if this fails -- run it how github actions run it
assert actual == expected
diff --git a/ui/sdk/src/hamilton_sdk/adapters.py
b/ui/sdk/src/hamilton_sdk/adapters.py
index 23cbb3a2..ddb3ee45 100644
--- a/ui/sdk/src/hamilton_sdk/adapters.py
+++ b/ui/sdk/src/hamilton_sdk/adapters.py
@@ -8,17 +8,17 @@ from datetime import timezone
from types import ModuleType
from typing import Any, Callable, Dict, List, Optional, Union
+from hamilton import graph as h_graph
+from hamilton import node
+from hamilton.data_quality import base as dq_base
+from hamilton.lifecycle import base
+
from hamilton_sdk import driver
from hamilton_sdk.api import clients, constants
from hamilton_sdk.tracking import runs
from hamilton_sdk.tracking.runs import Status, TrackingState
from hamilton_sdk.tracking.trackingtypes import TaskRun
-from hamilton import graph as h_graph
-from hamilton import node
-from hamilton.data_quality import base as dq_base
-from hamilton.lifecycle import base
-
logger = logging.getLogger(__name__)
@@ -314,7 +314,7 @@ class HamiltonTracker(
for i, other_result in enumerate(other_results):
other_attr = dict(
node_name=get_node_name(node_, task_id),
- name=other_result.get("name", f"Attribute {i+1}"), # retrieve
name if specified
+ name=other_result.get("name", f"Attribute {i + 1}"), #
retrieve name if specified
type=other_result["observability_type"],
# 0.0.3 -> 3
schema_version=int(other_result["observability_schema_version"].split(".")[-1]),
diff --git a/ui/sdk/tests/tracking/test_polars_col_stats.py
b/ui/sdk/tests/tracking/test_polars_col_stats.py
index a20e7938..3f3a976a 100644
--- a/ui/sdk/tests/tracking/test_polars_col_stats.py
+++ b/ui/sdk/tests/tracking/test_polars_col_stats.py
@@ -66,7 +66,7 @@ def test_quantiles(example_df):
def test_histogram(example_df):
assert pcs.histogram(example_df["a"], num_hist_bins=3) == {
- "(0.996, 2.333333]": 2,
+ "[1.0, 2.333333]": 2,
"(2.333333, 3.666667]": 1,
"(3.666667, 5.0]": 2,
}
diff --git a/ui/sdk/tests/tracking/test_polars_stats.py
b/ui/sdk/tests/tracking/test_polars_stats.py
index 76333c7c..a8a9d43a 100644
--- a/ui/sdk/tests/tracking/test_polars_stats.py
+++ b/ui/sdk/tests/tracking/test_polars_stats.py
@@ -30,7 +30,7 @@ def test_compute_stats_df():
"count": 5,
"data_type": "Int64",
"histogram": {
- "(0.996, 1.4]": 1,
+ "[1.0, 1.4]": 1,
"(1.4, 1.8]": 0,
"(1.8, 2.2]": 1,
"(2.2, 2.6]": 0,
@@ -76,7 +76,7 @@ def test_compute_stats_df():
"count": 5,
"data_type": "Float64",
"histogram": {
- "(0.996, 1.4]": 1,
+ "[1.0, 1.4]": 1,
"(1.4, 1.8]": 0,
"(1.8, 2.2]": 1,
"(2.2, 2.6]": 0,
diff --git a/ui/sdk/tests/tracking/test_pyspark_stats.py
b/ui/sdk/tests/tracking/test_pyspark_stats.py
index 5d239f83..694afc34 100644
--- a/ui/sdk/tests/tracking/test_pyspark_stats.py
+++ b/ui/sdk/tests/tracking/test_pyspark_stats.py
@@ -19,7 +19,7 @@ def test_compute_stats_pyspark():
"observability_schema_version": "0.0.2",
"observability_type": "dict",
"observability_value": {
- "type": "<class 'pyspark.sql.dataframe.DataFrame'>",
+ "type": "<class 'pyspark.sql.classic.dataframe.DataFrame'>",
"value": {
"columns": [
{
diff --git a/writeups/garbage_collection/memory_test.py
b/writeups/garbage_collection/memory_test.py
index 5912fba5..572517bb 100644
--- a/writeups/garbage_collection/memory_test.py
+++ b/writeups/garbage_collection/memory_test.py
@@ -46,7 +46,7 @@ def foo_0(memory_size: int = 100_000_000) -> pd.DataFrame:
@parameterize(
- **{f"foo_{i}": {"foo_i_minus_one": source(f"foo_{i-1}")} for i in range(1,
NUM_ITERS)}
+ **{f"foo_{i}": {"foo_i_minus_one": source(f"foo_{i - 1}")} for i in
range(1, NUM_ITERS)}
)
def foo_i(foo_i_minus_one: pd.DataFrame) -> pd.DataFrame:
global count