This is an automated email from the ASF dual-hosted git repository.
cswartzvi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/hamilton.git
The following commit(s) were added to refs/heads/main by this push:
new 875555f7 Clean up repo for first ASF release (Attempt 2) (#1374)
875555f7 is described below
commit 875555f7fd74c1de3a58f8de6a38496ca1748f43
Author: Charles Swartz <[email protected]>
AuthorDate: Thu Sep 4 09:48:00 2025 -0400
Clean up repo for first ASF release (Attempt 2) (#1374)
- Improve `uv sync` support (bump minimum python required to `3.8.1`)
- Utilize `dependency-groups` for dev dependencies in `pyproject.toml`
- Remove obsolete files (`setup.py`, `.flake8`, `.style.yapf`)
- Reorganize repo structure (move assets, docs, scripts, and writeups)
- Remove non-existent files from manifest
- Fix various broken tests (code and actions)
---------
Signed-off-by: Charles Swartz <[email protected]>
---
.flake8 | 20 ----
.github/workflows/hamilton-main.yml | 26 ++---
.github/workflows/sphinx-docs.yml | 2 +-
.pre-commit-config.yaml | 4 -
.style.yapf | 23 ----
MANIFEST.in | 2 -
dag.png => assets/dag.png | Bin
assets/dag_example_module.png | Bin 0 -> 27223 bytes
hamiltondag.png => assets/hamiltondag.png | Bin
.../hello_world_image.png | Bin
dag_example_module.png | Bin 27367 -> 0 bytes
README-DOCS.md => docs/README.md | 2 +-
docs/conf.py | 6 +
pyproject.toml | 121 +++++++++++----------
build_conda.sh => scripts/build_conda.sh | 0
setup.py | 49 ---------
tests/plugins/test_polars_extensions.py | 9 +-
basics.md => writeups/basics.md | 0
data_quality.md => writeups/data_quality.md | 0
decorators.md => writeups/decorators.md | 0
developer_setup.md => writeups/developer_setup.md | 4 +-
21 files changed, 92 insertions(+), 176 deletions(-)
diff --git a/.flake8 b/.flake8
deleted file mode 100644
index 276b3f11..00000000
--- a/.flake8
+++ /dev/null
@@ -1,20 +0,0 @@
-[flake8]
-max-line-length = 100
-exclude =
- build/
- .git/
- venv/
-
-ignore =
- # whitespace before ':'
- E203,
- # module level import not at top of file
- E402,
- # line too long
- E501,
- # line break before binary operator
- W503,
- # invalid escape sequence
- W605
- # multiple statements on one line
- E704
diff --git a/.github/workflows/hamilton-main.yml
b/.github/workflows/hamilton-main.yml
index d7ef1dae..15495039 100644
--- a/.github/workflows/hamilton-main.yml
+++ b/.github/workflows/hamilton-main.yml
@@ -63,13 +63,13 @@ jobs:
- name: Check linting with pre-commit
if: ${{ runner.os == 'Linux' }}
run: |
- uv sync --extra dev
+ uv sync --dev
uv run pre-commit install
uv run pre-commit run --all-files
- name: Test hamilton main package
run: |
- uv sync --extra test
+ uv sync --group test
uv pip install "kaleido<0.4.0"
uv run pytest tests/ --cov=hamilton --ignore tests/integrations
@@ -77,7 +77,7 @@ jobs:
- name: Test integrations
if: ${{ matrix.python-version == '3.9' }}
run: |
- uv sync --extra test --extra pandera
+ uv sync --group test --extra pandera
uv pip install -r tests/integrations/pandera/requirements.txt
uv pip install dask-expr
uv run pytest tests/integrations
@@ -85,24 +85,24 @@ jobs:
- name: Test integrations
if: ${{ matrix.python-version != '3.9' }}
run: |
- uv sync --extra test --extra pandera
+ uv sync --group test --extra pandera
uv pip install -r tests/integrations/pandera/requirements.txt
uv run pytest tests/integrations
- name: Test pandas
run: |
- uv sync --extra test
+ uv sync --group test
uv run pytest plugin_tests/h_pandas
- name: Test polars
run: |
- uv sync --extra test
+ uv sync --group test
uv pip install polars
uv run pytest plugin_tests/h_polars
- name: Test narwhals
run: |
- uv sync --extra test
+ uv sync --group test
uv pip install polars pandas narwhals
uv run pytest plugin_tests/h_narwhals
@@ -110,7 +110,7 @@ jobs:
# Dask supports >= py3.9
if: ${{ matrix.python-version != '3.8' }}
run: |
- uv sync --extra test --extra dask
+ uv sync --group test --extra dask
uv run pytest plugin_tests/h_dask
- name: Test ray
@@ -119,7 +119,7 @@ jobs:
env:
RAY_ENABLE_UV_RUN_RUNTIME_ENV: 0 #
https://github.com/ray-project/ray/issues/53848
run: |
- uv sync --extra test --extra ray
+ uv sync --group test --extra ray
uv run pytest plugin_tests/h_ray
- name: Test pyspark
@@ -129,7 +129,7 @@ jobs:
PYSPARK_SUBMIT_ARGS: "--conf spark.sql.ansi.enabled=false
pyspark-shell"
run: |
sudo apt-get install --no-install-recommends --yes default-jre
- uv sync --extra test --extra pyspark
+ uv sync --group test --extra pyspark
uv pip install 'numpy<2' 'pyspark[connect]' 'grpcio'
uv pip install --no-cache --reinstall --strict 'grpcio-status >=
1.48.1'
uv run pytest plugin_tests/h_spark
@@ -140,7 +140,7 @@ jobs:
env:
PYSPARK_SUBMIT_ARGS: "--conf spark.sql.ansi.enabled=false
pyspark-shell"
run: |
- uv sync --extra test --extra pyspark
+ uv sync --group test --extra pyspark
uv pip install 'numpy<2' 'pyspark[connect]' 'grpcio'
uv pip install --no-cache --reinstall --strict 'grpcio-status >=
1.48.1'
uv run pytest plugin_tests/h_spark
@@ -150,7 +150,7 @@ jobs:
if: ${{ runner.os == 'Linux' && (matrix.python-version == '3.8' ||
matrix.python-version == '3.9' || matrix.python-version == '3.10') }}
run: |
sudo apt-get install --no-install-recommends --yes libpcre3-dev
cargo
- uv sync --extra test --extra vaex
+ uv sync --group test --extra vaex
uv pip install "numpy<2"
uv run pytest plugin_tests/h_vaex
@@ -158,6 +158,6 @@ jobs:
# Vaex supports <= py3.10 and numpy<2
if: ${{ runner.os != 'Linux' && (matrix.python-version == '3.8' ||
matrix.python-version == '3.9' || matrix.python-version == '3.10') }}
run: |
- uv sync --extra test --extra vaex
+ uv sync --group test --extra vaex
uv pip install "numpy<2"
uv run pytest plugin_tests/h_vaex
diff --git a/.github/workflows/sphinx-docs.yml
b/.github/workflows/sphinx-docs.yml
index 2ee5966c..95831e7c 100644
--- a/.github/workflows/sphinx-docs.yml
+++ b/.github/workflows/sphinx-docs.yml
@@ -44,7 +44,7 @@ jobs:
- name: Install Sphinx and dependencies
run: |
python -m pip install --upgrade --no-cache-dir sphinx sphinx-rtd-theme
sphinx-simplepdf
- python -m pip install --upgrade --upgrade-strategy only-if-needed
--no-cache-dir .[docs]
+ python -m pip install --group docs --upgrade --upgrade-strategy
only-if-needed --no-cache-dir
- name: Build Sphinx documentation
working-directory: ./docs
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index c6c29d04..ec7978d9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -25,10 +25,6 @@ repos:
- id: requirements-txt-fixer
# valid python file
- id: check-ast
- - repo: https://github.com/pycqa/flake8
- rev: 7.1.1
- hooks:
- - id: flake8
- repo: local
hooks:
- id: validate-example-notebooks
diff --git a/.style.yapf b/.style.yapf
deleted file mode 100644
index df98eb3d..00000000
--- a/.style.yapf
+++ /dev/null
@@ -1,23 +0,0 @@
-[style]
-based_on_style = google
-
-# max characters per line
-COLUMN_LIMIT = 100
-
-# Put closing brackets on a separate line, dedented, if the bracketed
expression can't fit in a single line
-DEDENT_CLOSING_BRACKETS = true
-
-# Place each dictionary entry onto its own line.
-EACH_DICT_ENTRY_ON_SEPARATE_LINE = true
-
-# Join short lines into one line. E.g., single line if statements.
-JOIN_MULTIPLE_LINES = true
-
-# Insert a blank line before a def or class immediately nested within another
def or class
-BLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true
-
-# Split before arguments if the argument list is terminated by a comma.
-SPLIT_ARGUMENTS_WHEN_COMMA_TERMINATED = true
-
-# If an argument / parameter list is going to be split, then split before the
first argument
-SPLIT_BEFORE_FIRST_ARGUMENT = true
diff --git a/MANIFEST.in b/MANIFEST.in
index 243b5fcf..50cad726 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,3 @@
-include requirements.txt
-include requirements-test.txt
include LICENSE
include *.md
include NOTICE
diff --git a/dag.png b/assets/dag.png
similarity index 100%
rename from dag.png
rename to assets/dag.png
diff --git a/assets/dag_example_module.png b/assets/dag_example_module.png
new file mode 100644
index 00000000..8c75312e
Binary files /dev/null and b/assets/dag_example_module.png differ
diff --git a/hamiltondag.png b/assets/hamiltondag.png
similarity index 100%
rename from hamiltondag.png
rename to assets/hamiltondag.png
diff --git a/hello_world_image.png b/assets/hello_world_image.png
similarity index 100%
rename from hello_world_image.png
rename to assets/hello_world_image.png
diff --git a/dag_example_module.png b/dag_example_module.png
deleted file mode 100644
index 5351fb71..00000000
Binary files a/dag_example_module.png and /dev/null differ
diff --git a/README-DOCS.md b/docs/README.md
similarity index 97%
rename from README-DOCS.md
rename to docs/README.md
index 82473176..d21e7419 100644
--- a/README-DOCS.md
+++ b/docs/README.md
@@ -7,7 +7,7 @@ Instructions for managing documentation on read the docs.
To build locally, you need to run the following -- make sure you're in the
root of the repo:
```bash
-pip install .[docs]
+pip install --group docs
```
and then one of the following to build and view the documents:
```bash
diff --git a/docs/conf.py b/docs/conf.py
index 44681b6b..bcccc0d1 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -76,6 +76,12 @@ nb_mime_priority_overrides = [
["simplepdf", "text/plain", 100],
]
+exclude_patterns = [
+ '_build',
+ 'Thumbs.db',
+ '.DS_Store',
+ 'README.md',
+]
# for the sitemap extension ---
# check if the current commit is tagged as a release (vX.Y.Z) and set the
version
diff --git a/pyproject.toml b/pyproject.toml
index 3a45b94a..932c8b04 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ name = "sf-hamilton"
dynamic = ["version"]
description = "Hamilton, the micro-framework for creating dataframes."
readme = "README.md"
-requires-python = ">=3.8, <4"
+requires-python = ">=3.8.1, <4"
license = {text = "Apache-2.0"}
keywords = ["hamilton"]
authors = [
@@ -41,54 +41,7 @@ dask-dataframe = ["dask[dataframe]"]
dask-diagnostics = ["dask[diagnostics]"]
dask-distributed = ["dask[distributed]"]
datadog = ["ddtrace<3.0"] # Temporary pin until h_ddog.py import is fixed for
>3.0 version
-dev = [
- "pre-commit",
- "ruff==0.5.7", # this should match `.pre-commit-config.yaml`
-]
diskcache = ["diskcache"]
-docs = [
- "sf-hamilton[dev]",
- "alabaster>=0.7,<0.8,!=0.7.5", # read the docs pins
- "commonmark==0.9.1", # read the docs pins
- "dask-expr>=1.1.14; python_version >= '3.10'", # Bugfix only available after
py3.10 https://github.com/dask/dask-expr/pull/1150
- "dask[distributed]",
- "ddtrace<3.0",
- "diskcache",
- # required for all the plugins
- "dlt",
- "furo",
- "gitpython", # Required for parsing git info for generation of data-adapter
docs
- "grpcio-status",
- "lightgbm",
- "lxml",
- "lz4",
- "mlflow",
- "mock==1.0.1", # read the docs pins
- "myst-nb",
- "narwhals",
- "numpy < 2.0.0",
- "packaging",
- "pandera",
- "pillow",
- "polars",
- "pyarrow >= 1.0.0",
- "pydantic >=2.0",
- "pyspark",
- "openlineage-python",
- "PyYAML",
- "ray",
- "readthedocs-sphinx-ext<2.3", # read the docs pins
- "recommonmark==0.5.0", # read the docs pins
- "scikit-learn",
- "slack-sdk",
- "sphinx", # unpinned because myst-parser doesn't break anymore
- "sphinx-autobuild",
- "sphinx-rtd-theme", # read the docs pins
- "sphinx-simplepdf",
- "sphinx-sitemap",
- "tqdm",
- "xgboost",
-]
experiments = [
"fastapi",
"fastui",
@@ -96,9 +49,6 @@ experiments = [
]
lsp = ["sf-hamilton-lsp"]
openlineage = ["openlineage-python"]
-packaging = [
- "build",
-]
pandera = ["pandera"]
pydantic = ["pydantic>=2.0"]
pyspark = [
@@ -109,6 +59,21 @@ ray = ["ray>=2.0.0", "pyarrow"]
rich = ["rich"]
sdk = ["sf-hamilton-sdk"]
slack = ["slack-sdk"]
+
+tqdm = ["tqdm"]
+ui = ["sf-hamilton-ui"]
+
+# vaex -- on >=py3.11 only core part available
https://github.com/vaexio/vaex/pull/2331#issuecomment-2437198176
+vaex = [
+ "vaex; python_version <= '3.10'"
+ ]
+visualization = ["graphviz", "networkx"]
+
+[dependency-groups]
+dev = [
+ "pre-commit",
+ "ruff==0.5.7", # this should match `.pre-commit-config.yaml`
+]
test = [
"connectorx<=0.3.2; python_version=='3.8'",
"connectorx; python_version!='3.8'",
@@ -134,7 +99,8 @@ test = [
"polars",
"pyarrow",
"pydantic >=2.0",
- "pyreadstat", # for SPSS data loader
+ "pyreadstat<1.2.8; python_version <= '3.9'", # for SPSS data loader
+ "pyreadstat; python_version > '3.9'", # for SPSS data loader
"pytest",
"pytest-asyncio",
"pytest-cov",
@@ -147,14 +113,49 @@ test = [
"xlsx2csv", # for excel data loader
"xlsxwriter", # Excel export requires 'xlsxwriter'
]
-tqdm = ["tqdm"]
-ui = ["sf-hamilton-ui"]
-
-# vaex -- on >=py3.11 only core part available
https://github.com/vaexio/vaex/pull/2331#issuecomment-2437198176
-vaex = [
- "vaex; python_version <= '3.10'"
- ]
-visualization = ["graphviz", "networkx"]
+docs = [
+ {include-group = "dev"},
+ "alabaster>=0.7,<0.8,!=0.7.5", # read the docs pins
+ "commonmark==0.9.1", # read the docs pins
+ "dask-expr>=1.1.14; python_version >= '3.10'", # Bugfix only available after
py3.10 https://github.com/dask/dask-expr/pull/1150
+ "dask[distributed]",
+ "ddtrace<3.0",
+ "diskcache",
+ # required for all the plugins
+ "dlt",
+ "furo",
+ "gitpython", # Required for parsing git info for generation of data-adapter
docs
+ "grpcio-status",
+ "lightgbm",
+ "lxml",
+ "lz4",
+ "mlflow",
+ "mock==1.0.1", # read the docs pins
+ "myst-nb",
+ "narwhals",
+ "numpy < 2.0.0",
+ "packaging",
+ "pandera",
+ "pillow",
+ "polars",
+ "pyarrow >= 1.0.0",
+ "pydantic >=2.0",
+ "pyspark",
+ "openlineage-python",
+ "PyYAML",
+ "ray",
+ "readthedocs-sphinx-ext<2.3", # read the docs pins
+ "recommonmark==0.5.0", # read the docs pins
+ "scikit-learn",
+ "slack-sdk",
+ "sphinx", # unpinned because myst-parser doesn't break anymore
+ "sphinx-autobuild",
+ "sphinx-rtd-theme", # read the docs pins
+ "sphinx-simplepdf",
+ "sphinx-sitemap",
+ "tqdm",
+ "xgboost",
+]
[project.entry-points.console_scripts]
h_experiments = "hamilton.plugins.h_experiments.__main__:main"
diff --git a/build_conda.sh b/scripts/build_conda.sh
similarity index 100%
rename from build_conda.sh
rename to scripts/build_conda.sh
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 6530bda8..00000000
--- a/setup.py
+++ /dev/null
@@ -1,49 +0,0 @@
-#!/usr/bin/env python
-"""The setup script."""
-
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import warnings
-
-from setuptools import setup
-
-# don't fail if there are problems with the readme (happens within circleci)
-try:
- with open("README.md") as readme_file:
- readme = readme_file.read()
-except Exception:
- warnings.warn("README.md not found") # noqa
- readme = None
-
-
-def get_version():
- version_dict = {}
- with open("hamilton/version.py") as f:
- exec(f.read(), version_dict)
- return ".".join(map(str, version_dict["VERSION"]))
-
-
-VERSION = get_version()
-
-
-setup(
- version=VERSION,
- long_description=readme,
- long_description_content_type="text/markdown",
- zip_safe=False,
-)
diff --git a/tests/plugins/test_polars_extensions.py
b/tests/plugins/test_polars_extensions.py
index 796e413c..1bec4494 100644
--- a/tests/plugins/test_polars_extensions.py
+++ b/tests/plugins/test_polars_extensions.py
@@ -201,5 +201,12 @@ def test_polars_spreadsheet(df: pl.DataFrame, tmp_path:
pathlib.Path) -> None:
def test_getting_type_hints_spreadsheetwriter():
"""Tests that types can be resolved at run time."""
- type_hints = typing.get_type_hints(PolarsSpreadsheetWriter)
+
+ local_namespace = {}
+ if sys.version_info.major == 3 and sys.version_info.minor > 8:
+ from polars.selectors import Selector
+
+ local_namespace = {"Selector": Selector}
+
+ type_hints = typing.get_type_hints(PolarsSpreadsheetWriter,
localns=local_namespace)
assert type_hints["workbook"] == typing.Union[Workbook, io.BytesIO,
pathlib.Path, str]
diff --git a/basics.md b/writeups/basics.md
similarity index 100%
rename from basics.md
rename to writeups/basics.md
diff --git a/data_quality.md b/writeups/data_quality.md
similarity index 100%
rename from data_quality.md
rename to writeups/data_quality.md
diff --git a/decorators.md b/writeups/decorators.md
similarity index 100%
rename from decorators.md
rename to writeups/decorators.md
diff --git a/developer_setup.md b/writeups/developer_setup.md
similarity index 97%
rename from developer_setup.md
rename to writeups/developer_setup.md
index 11f6c67f..7a859165 100644
--- a/developer_setup.md
+++ b/writeups/developer_setup.md
@@ -167,8 +167,8 @@ Note: since it is common to have pyenv installed too --
conda and pyenv don't pl
`conda config --set auto_activate_base False` to not set conda to be active by
default.
3. Make sure you have an Anaconda account and are authorized to push to
anaconda.
4. Log in to anaconda (e.g. conda activate && anaconda login).
-5. We have a script `build_conda.sh` that is a bash script that encapsulates
the steps. For reference
+5. We have a script `scripts/build_conda.sh` that is a bash script that
encapsulates the steps. For reference
it roughly follows [this
documentation](https://conda.io/projects/conda-build/en/latest/user-guide/tutorials/build-pkgs-skeleton.html).
-Run it with `bash build_conda.sh`. It should "just work".
+Run it with `bash scripts/build_conda.sh`. It should "just work".
6. Be sure to remove any files it creates afterwards so when you come to do a
release again, you're not uploading the
same files.