This is an automated email from the ASF dual-hosted git repository.
fokko pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg-python.git
The following commit(s) were added to refs/heads/main by this push:
new 0cebec48 Replace `numpy` usage and remove from `pyproject.toml` (#1272)
0cebec48 is described below
commit 0cebec48833f75eeca02b1a965112615b1cbc1c8
Author: Kevin Liu <[email protected]>
AuthorDate: Thu Oct 31 10:49:15 2024 -0400
Replace `numpy` usage and remove from `pyproject.toml` (#1272)
* use random instead of numpy
* remove numpy from pyproject.toml
---
poetry.lock | 10 ++++----
pyproject.toml | 35 ++++------------------------
tests/integration/test_writes/test_writes.py | 16 ++++++-------
3 files changed, 17 insertions(+), 44 deletions(-)
diff --git a/poetry.lock b/poetry.lock
index 2c3e02fd..b144fd16 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -4519,14 +4519,14 @@ cffi = ["cffi (>=1.11)"]
[extras]
adlfs = ["adlfs"]
daft = ["getdaft"]
-duckdb = ["duckdb", "numpy", "pyarrow"]
+duckdb = ["duckdb", "pyarrow"]
dynamodb = ["boto3"]
gcsfs = ["gcsfs"]
glue = ["boto3", "mypy-boto3-glue"]
hive = ["thrift"]
-pandas = ["numpy", "pandas", "pyarrow"]
-pyarrow = ["numpy", "pyarrow"]
-ray = ["numpy", "pandas", "pyarrow", "ray", "ray"]
+pandas = ["pandas", "pyarrow"]
+pyarrow = ["pyarrow"]
+ray = ["pandas", "pyarrow", "ray", "ray"]
s3fs = ["s3fs"]
snappy = ["python-snappy"]
sql-postgres = ["psycopg2-binary", "sqlalchemy"]
@@ -4536,4 +4536,4 @@ zstandard = ["zstandard"]
[metadata]
lock-version = "2.0"
python-versions = "^3.9, <3.13, !=3.9.7"
-content-hash =
"c8e9ed26f57ff8c43dde985f66cd30694ec0ac032ed9da9cda375fbe05bd3302"
+content-hash =
"9ff6b794eee7db5b198ff9df41d3a3f74eed4d620555dc286e62d33a1b1bb3f0"
diff --git a/pyproject.toml b/pyproject.toml
index eb159463..f3a9bdfe 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -78,9 +78,6 @@ gcsfs = { version = ">=2023.1.0,<2024.1.0", optional = true }
psycopg2-binary = { version = ">=2.9.6", optional = true }
sqlalchemy = { version = "^2.0.18", optional = true }
getdaft = { version = ">=0.2.12", optional = true }
-numpy = [
- { version = "1.26.0", python = ">=3.9,<3.13", optional = true },
-]
cachetools = "^5.5.0"
[tool.poetry.group.dev.dependencies]
@@ -238,10 +235,6 @@ ignore_missing_imports = true
module = "sortedcontainers.*"
ignore_missing_imports = true
-[[tool.mypy.overrides]]
-module = "numpy.*"
-ignore_missing_imports = true
-
[[tool.mypy.overrides]]
module = "sqlalchemy.*"
ignore_missing_imports = true
@@ -394,10 +387,6 @@ ignore_missing_imports = true
module = "sortedcontainers.*"
ignore_missing_imports = true
-[[tool.mypy.overrides]]
-module = "numpy.*"
-ignore_missing_imports = true
-
[[tool.mypy.overrides]]
module = "sqlalchemy.*"
ignore_missing_imports = true
@@ -550,10 +539,6 @@ ignore_missing_imports = true
module = "sortedcontainers.*"
ignore_missing_imports = true
-[[tool.mypy.overrides]]
-module = "numpy.*"
-ignore_missing_imports = true
-
[[tool.mypy.overrides]]
module = "sqlalchemy.*"
ignore_missing_imports = true
@@ -706,10 +691,6 @@ ignore_missing_imports = true
module = "sortedcontainers.*"
ignore_missing_imports = true
-[[tool.mypy.overrides]]
-module = "numpy.*"
-ignore_missing_imports = true
-
[[tool.mypy.overrides]]
module = "sqlalchemy.*"
ignore_missing_imports = true
@@ -862,10 +843,6 @@ ignore_missing_imports = true
module = "sortedcontainers.*"
ignore_missing_imports = true
-[[tool.mypy.overrides]]
-module = "numpy.*"
-ignore_missing_imports = true
-
[[tool.mypy.overrides]]
module = "sqlalchemy.*"
ignore_missing_imports = true
@@ -894,10 +871,10 @@ generate-setup-file = false
script = "build-module.py"
[tool.poetry.extras]
-pyarrow = ["pyarrow", "numpy"]
-pandas = ["pandas", "pyarrow", "numpy"]
-duckdb = ["duckdb", "pyarrow", "numpy"]
-ray = ["ray", "pyarrow", "pandas", "numpy"]
+pyarrow = ["pyarrow"]
+pandas = ["pandas", "pyarrow"]
+duckdb = ["duckdb", "pyarrow"]
+ray = ["ray", "pyarrow", "pandas"]
daft = ["getdaft"]
snappy = ["python-snappy"]
hive = ["thrift"]
@@ -1084,10 +1061,6 @@ ignore_missing_imports = true
module = "sortedcontainers.*"
ignore_missing_imports = true
-[[tool.mypy.overrides]]
-module = "numpy.*"
-ignore_missing_imports = true
-
[[tool.mypy.overrides]]
module = "sqlalchemy.*"
ignore_missing_imports = true
diff --git a/tests/integration/test_writes/test_writes.py
b/tests/integration/test_writes/test_writes.py
index 49c7c2df..01744514 100644
--- a/tests/integration/test_writes/test_writes.py
+++ b/tests/integration/test_writes/test_writes.py
@@ -17,13 +17,13 @@
# pylint:disable=redefined-outer-name
import math
import os
+import random
import time
from datetime import date, datetime, timedelta
from pathlib import Path
from typing import Any, Dict
from urllib.parse import urlparse
-import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.compute as pc
@@ -1373,14 +1373,14 @@ def test_delete_threshold(session_catalog: Catalog) ->
None:
date_start, date_end = date(2024, 1, 1), date(2024, 2, 1)
# Generate the 'id' column
- id_column = np.random.randint(id_min, id_max, num_rows)
+ id_column = [random.randint(id_min, id_max) for _ in range(num_rows)]
# Generate the 'created_at' column as dates only
- date_range = pd.date_range(start=date_start, end=date_end, freq="D") #
Daily frequency for dates
- created_at_column = np.random.choice(date_range, num_rows) # Convert to
string (YYYY-MM-DD format)
+ date_range = pd.date_range(start=date_start, end=date_end,
freq="D").to_list() # Daily frequency for dates
+ created_at_column = [random.choice(date_range) for _ in range(num_rows)]
# Convert to string (YYYY-MM-DD format)
# Generate the 'relevancy_score' column with a peak around 0.1
- relevancy_score_column = np.random.beta(a=2, b=20, size=num_rows) #
Adjusting parameters to peak around 0.1
+ relevancy_score_column = [random.betavariate(2, 20) for _ in
range(num_rows)] # Adjusting parameters to peak around 0.1
# Create the dataframe
df = pd.DataFrame({"id": id_column, "created_at": created_at_column,
"relevancy_score": relevancy_score_column})
@@ -1403,12 +1403,12 @@ def test_delete_threshold(session_catalog: Catalog) ->
None:
@pytest.mark.integration
def test_rewrite_manifest_after_partition_evolution(session_catalog: Catalog)
-> None:
- np.random.seed(876)
+ random.seed(876)
N = 1440
d = {
"timestamp": pa.array([datetime(2023, 1, 1, 0, 0, 0) +
timedelta(minutes=i) for i in range(N)]),
- "category": pa.array([np.random.choice(["A", "B", "C"]) for _ in
range(N)]),
- "value": pa.array(np.random.normal(size=N)),
+ "category": pa.array([random.choice(["A", "B", "C"]) for _ in
range(N)]),
+ "value": pa.array([random.gauss(0, 1) for _ in range(N)]),
}
data = pa.Table.from_pydict(d)