This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new e28590a51a Python: Bump to Pydantic v2 (#7782)
e28590a51a is described below
commit e28590a51adfb78ea392e8352be0c14b167d9ebc
Author: Fokko Driesprong <[email protected]>
AuthorDate: Wed Aug 16 19:55:07 2023 +0200
Python: Bump to Pydantic v2 (#7782)
---
python/poetry.lock | 241 +++++++++++++++-------
python/pyiceberg/catalog/__init__.py | 2 +-
python/pyiceberg/catalog/rest.py | 8 +-
python/pyiceberg/cli/output.py | 10 +-
python/pyiceberg/io/pyarrow.py | 2 +-
python/pyiceberg/partitioning.py | 30 ++-
python/pyiceberg/serializers.py | 2 +-
python/pyiceberg/table/__init__.py | 6 +-
python/pyiceberg/table/metadata.py | 187 +++++++++--------
python/pyiceberg/table/snapshots.py | 38 +---
python/pyiceberg/table/sorting.py | 20 +-
python/pyiceberg/transforms.py | 100 ++++------
python/pyiceberg/typedef.py | 45 ++++-
python/pyiceberg/types.py | 342 +++++++++++++++++++-------------
python/pyiceberg/utils/parsing.py | 4 +-
python/pyproject.toml | 6 +-
python/tests/avro/test_reader.py | 2 +-
python/tests/avro/test_writer.py | 2 +-
python/tests/catalog/test_hive.py | 2 +-
python/tests/catalog/test_rest.py | 9 +-
python/tests/cli/test_console.py | 9 +-
python/tests/conftest.py | 4 +-
python/tests/io/test_pyarrow.py | 16 +-
python/tests/table/test_init.py | 11 +-
python/tests/table/test_metadata.py | 62 ++----
python/tests/table/test_partitioning.py | 15 +-
python/tests/table/test_snapshots.py | 27 +--
python/tests/table/test_sorting.py | 8 +-
python/tests/test_integration.py | 2 +-
python/tests/test_schema.py | 14 +-
python/tests/test_transforms.py | 48 +++--
python/tests/test_types.py | 119 +++++------
32 files changed, 802 insertions(+), 591 deletions(-)
diff --git a/python/poetry.lock b/python/poetry.lock
index 1a99547c65..7063b0dd11 100644
--- a/python/poetry.lock
+++ b/python/poetry.lock
@@ -179,6 +179,20 @@ files = [
[package.dependencies]
frozenlist = ">=1.1.0"
+[[package]]
+name = "annotated-types"
+version = "0.5.0"
+description = "Reusable constraint types to use with typing.Annotated"
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "annotated_types-0.5.0-py3-none-any.whl", hash =
"sha256:58da39888f92c276ad970249761ebea80ba544b77acddaa1a4d6cf78287d45fd"},
+ {file = "annotated_types-0.5.0.tar.gz", hash =
"sha256:47cdc3490d9ac1506ce92c7aaa76c579dc3509ff11e098fc867e5130ab7be802"},
+]
+
+[package.dependencies]
+typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""}
+
[[package]]
name = "async-timeout"
version = "4.0.3"
@@ -210,19 +224,19 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy
(>=1.1.1)", "pympler", "pyte
[[package]]
name = "azure-core"
-version = "1.29.1"
+version = "1.29.2"
description = "Microsoft Azure Core Library for Python"
optional = true
python-versions = ">=3.7"
files = [
- {file = "azure-core-1.29.1.zip", hash =
"sha256:68e5bb6e3a3230ec202001cc5cb88e57f11c441c8345e921a9ffb8c370abf936"},
- {file = "azure_core-1.29.1-py3-none-any.whl", hash =
"sha256:6bcefa1f70ff7bf3c39c07c73d8a21df73288eff7e6a1031eb8cfae71cc7bed4"},
+ {file = "azure-core-1.29.2.zip", hash =
"sha256:beb0fe88d1043d8457318e8fb841d9caa648211092eda213c16b376401f3710d"},
+ {file = "azure_core-1.29.2-py3-none-any.whl", hash =
"sha256:8e6602f322dc1070caf7e17754beb53b69ffa09df0f4786009a3107e9a00c793"},
]
[package.dependencies]
requests = ">=2.18.4"
six = ">=1.11.0"
-typing-extensions = ">=4.3.0"
+typing-extensions = ">=4.6.0"
[package.extras]
aio = ["aiohttp (>=3.0)"]
@@ -834,13 +848,13 @@ files = [
[[package]]
name = "exceptiongroup"
-version = "1.1.2"
+version = "1.1.3"
description = "Backport of PEP 654 (exception groups)"
optional = false
python-versions = ">=3.7"
files = [
- {file = "exceptiongroup-1.1.2-py3-none-any.whl", hash =
"sha256:e346e69d186172ca7cf029c8c1d16235aa0e04035e5750b4b95039e65204328f"},
- {file = "exceptiongroup-1.1.2.tar.gz", hash =
"sha256:12c3e887d6485d16943a309616de20ae5582633e0a2eda17f4e10fd61c1e8af5"},
+ {file = "exceptiongroup-1.1.3-py3-none-any.whl", hash =
"sha256:343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3"},
+ {file = "exceptiongroup-1.1.3.tar.gz", hash =
"sha256:097acd85d473d75af5bb98e41b61ff7fe35efe6675e4f9370ec6ec5126d160e9"},
]
[package.extras]
@@ -2297,55 +2311,135 @@ files = [
[[package]]
name = "pydantic"
-version = "1.10.12"
-description = "Data validation and settings management using python type hints"
+version = "2.1.1"
+description = "Data validation using Python type hints"
optional = false
python-versions = ">=3.7"
files = [
- {file = "pydantic-1.10.12-cp310-cp310-macosx_10_9_x86_64.whl", hash =
"sha256:a1fcb59f2f355ec350073af41d927bf83a63b50e640f4dbaa01053a28b7a7718"},
- {file = "pydantic-1.10.12-cp310-cp310-macosx_11_0_arm64.whl", hash =
"sha256:b7ccf02d7eb340b216ec33e53a3a629856afe1c6e0ef91d84a4e6f2fb2ca70fe"},
- {file =
"pydantic-1.10.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:8fb2aa3ab3728d950bcc885a2e9eff6c8fc40bc0b7bb434e555c215491bcf48b"},
- {file =
"pydantic-1.10.12-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl",
hash =
"sha256:771735dc43cf8383959dc9b90aa281f0b6092321ca98677c5fb6125a6f56d58d"},
- {file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_i686.whl", hash =
"sha256:ca48477862372ac3770969b9d75f1bf66131d386dba79506c46d75e6b48c1e09"},
- {file = "pydantic-1.10.12-cp310-cp310-musllinux_1_1_x86_64.whl", hash =
"sha256:a5e7add47a5b5a40c49b3036d464e3c7802f8ae0d1e66035ea16aa5b7a3923ed"},
- {file = "pydantic-1.10.12-cp310-cp310-win_amd64.whl", hash =
"sha256:e4129b528c6baa99a429f97ce733fff478ec955513630e61b49804b6cf9b224a"},
- {file = "pydantic-1.10.12-cp311-cp311-macosx_10_9_x86_64.whl", hash =
"sha256:b0d191db0f92dfcb1dec210ca244fdae5cbe918c6050b342d619c09d31eea0cc"},
- {file = "pydantic-1.10.12-cp311-cp311-macosx_11_0_arm64.whl", hash =
"sha256:795e34e6cc065f8f498c89b894a3c6da294a936ee71e644e4bd44de048af1405"},
- {file =
"pydantic-1.10.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:69328e15cfda2c392da4e713443c7dbffa1505bc9d566e71e55abe14c97ddc62"},
- {file =
"pydantic-1.10.12-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl",
hash =
"sha256:2031de0967c279df0d8a1c72b4ffc411ecd06bac607a212892757db7462fc494"},
- {file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_i686.whl", hash =
"sha256:ba5b2e6fe6ca2b7e013398bc7d7b170e21cce322d266ffcd57cca313e54fb246"},
- {file = "pydantic-1.10.12-cp311-cp311-musllinux_1_1_x86_64.whl", hash =
"sha256:2a7bac939fa326db1ab741c9d7f44c565a1d1e80908b3797f7f81a4f86bc8d33"},
- {file = "pydantic-1.10.12-cp311-cp311-win_amd64.whl", hash =
"sha256:87afda5539d5140cb8ba9e8b8c8865cb5b1463924d38490d73d3ccfd80896b3f"},
- {file = "pydantic-1.10.12-cp37-cp37m-macosx_10_9_x86_64.whl", hash =
"sha256:549a8e3d81df0a85226963611950b12d2d334f214436a19537b2efed61b7639a"},
- {file =
"pydantic-1.10.12-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:598da88dfa127b666852bef6d0d796573a8cf5009ffd62104094a4fe39599565"},
- {file =
"pydantic-1.10.12-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl",
hash =
"sha256:ba5c4a8552bff16c61882db58544116d021d0b31ee7c66958d14cf386a5b5350"},
- {file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_i686.whl", hash =
"sha256:c79e6a11a07da7374f46970410b41d5e266f7f38f6a17a9c4823db80dadf4303"},
- {file = "pydantic-1.10.12-cp37-cp37m-musllinux_1_1_x86_64.whl", hash =
"sha256:ab26038b8375581dc832a63c948f261ae0aa21f1d34c1293469f135fa92972a5"},
- {file = "pydantic-1.10.12-cp37-cp37m-win_amd64.whl", hash =
"sha256:e0a16d274b588767602b7646fa05af2782576a6cf1022f4ba74cbb4db66f6ca8"},
- {file = "pydantic-1.10.12-cp38-cp38-macosx_10_9_x86_64.whl", hash =
"sha256:6a9dfa722316f4acf4460afdf5d41d5246a80e249c7ff475c43a3a1e9d75cf62"},
- {file = "pydantic-1.10.12-cp38-cp38-macosx_11_0_arm64.whl", hash =
"sha256:a73f489aebd0c2121ed974054cb2759af8a9f747de120acd2c3394cf84176ccb"},
- {file =
"pydantic-1.10.12-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:6b30bcb8cbfccfcf02acb8f1a261143fab622831d9c0989707e0e659f77a18e0"},
- {file =
"pydantic-1.10.12-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl",
hash =
"sha256:2fcfb5296d7877af406ba1547dfde9943b1256d8928732267e2653c26938cd9c"},
- {file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_i686.whl", hash =
"sha256:2f9a6fab5f82ada41d56b0602606a5506aab165ca54e52bc4545028382ef1c5d"},
- {file = "pydantic-1.10.12-cp38-cp38-musllinux_1_1_x86_64.whl", hash =
"sha256:dea7adcc33d5d105896401a1f37d56b47d443a2b2605ff8a969a0ed5543f7e33"},
- {file = "pydantic-1.10.12-cp38-cp38-win_amd64.whl", hash =
"sha256:1eb2085c13bce1612da8537b2d90f549c8cbb05c67e8f22854e201bde5d98a47"},
- {file = "pydantic-1.10.12-cp39-cp39-macosx_10_9_x86_64.whl", hash =
"sha256:ef6c96b2baa2100ec91a4b428f80d8f28a3c9e53568219b6c298c1125572ebc6"},
- {file = "pydantic-1.10.12-cp39-cp39-macosx_11_0_arm64.whl", hash =
"sha256:6c076be61cd0177a8433c0adcb03475baf4ee91edf5a4e550161ad57fc90f523"},
- {file =
"pydantic-1.10.12-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:2d5a58feb9a39f481eda4d5ca220aa8b9d4f21a41274760b9bc66bfd72595b86"},
- {file =
"pydantic-1.10.12-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl",
hash =
"sha256:e5f805d2d5d0a41633651a73fa4ecdd0b3d7a49de4ec3fadf062fe16501ddbf1"},
- {file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_i686.whl", hash =
"sha256:1289c180abd4bd4555bb927c42ee42abc3aee02b0fb2d1223fb7c6e5bef87dbe"},
- {file = "pydantic-1.10.12-cp39-cp39-musllinux_1_1_x86_64.whl", hash =
"sha256:5d1197e462e0364906cbc19681605cb7c036f2475c899b6f296104ad42b9f5fb"},
- {file = "pydantic-1.10.12-cp39-cp39-win_amd64.whl", hash =
"sha256:fdbdd1d630195689f325c9ef1a12900524dceb503b00a987663ff4f58669b93d"},
- {file = "pydantic-1.10.12-py3-none-any.whl", hash =
"sha256:b749a43aa51e32839c9d71dc67eb1e4221bb04af1033a32e3923d46f9effa942"},
- {file = "pydantic-1.10.12.tar.gz", hash =
"sha256:0fe8a415cea8f340e7a9af9c54fc71a649b43e8ca3cc732986116b3cb135d303"},
+ {file = "pydantic-2.1.1-py3-none-any.whl", hash =
"sha256:43bdbf359d6304c57afda15c2b95797295b702948082d4c23851ce752f21da70"},
+ {file = "pydantic-2.1.1.tar.gz", hash =
"sha256:22d63db5ce4831afd16e7c58b3192d3faf8f79154980d9397d9867254310ba4b"},
]
[package.dependencies]
-typing-extensions = ">=4.2.0"
+annotated-types = ">=0.4.0"
+pydantic-core = "2.4.0"
+typing-extensions = ">=4.6.1"
[package.extras]
-dotenv = ["python-dotenv (>=0.10.4)"]
-email = ["email-validator (>=1.0.3)"]
+email = ["email-validator (>=2.0.0)"]
+
+[[package]]
+name = "pydantic-core"
+version = "2.4.0"
+description = ""
+optional = false
+python-versions = ">=3.7"
+files = [
+ {file = "pydantic_core-2.4.0-cp310-cp310-macosx_10_7_x86_64.whl", hash =
"sha256:2ca4687dd996bde7f3c420def450797feeb20dcee2b9687023e3323c73fc14a2"},
+ {file = "pydantic_core-2.4.0-cp310-cp310-macosx_11_0_arm64.whl", hash =
"sha256:782fced7d61469fd1231b184a80e4f2fa7ad54cd7173834651a453f96f29d673"},
+ {file =
"pydantic_core-2.4.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
hash =
"sha256:6213b471b68146af97b8551294e59e7392c2117e28ffad9c557c65087f4baee3"},
+ {file =
"pydantic_core-2.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:63797499a219d8e81eb4e0c42222d0a4c8ec896f5c76751d4258af95de41fdf1"},
+ {file = "pydantic_core-2.4.0-cp310-cp310-manylinux_2_24_armv7l.whl", hash
= "sha256:0455876d575a35defc4da7e0a199596d6c773e20d3d42fa1fc29f6aa640369ed"},
+ {file = "pydantic_core-2.4.0-cp310-cp310-manylinux_2_24_ppc64le.whl", hash
= "sha256:8c938c96294d983dcf419b54dba2d21056959c22911d41788efbf949a29ae30d"},
+ {file = "pydantic_core-2.4.0-cp310-cp310-manylinux_2_24_s390x.whl", hash =
"sha256:878a5017d93e776c379af4e7b20f173c82594d94fa073059bcc546789ad50bf8"},
+ {file =
"pydantic_core-2.4.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash
= "sha256:69159afc2f2dc43285725f16143bc5df3c853bc1cb7df6021fce7ef1c69e8171"},
+ {file = "pydantic_core-2.4.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash
= "sha256:54df7df399b777c1fd144f541c95d351b3aa110535a6810a6a569905d106b6f3"},
+ {file = "pydantic_core-2.4.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash =
"sha256:e412607ca89a0ced10758dfb8f9adcc365ce4c1c377e637c01989a75e9a9ec8a"},
+ {file = "pydantic_core-2.4.0-cp310-none-win32.whl", hash =
"sha256:853f103e2b9a58832fdd08a587a51de8b552ae90e1a5d167f316b7eabf8d7dde"},
+ {file = "pydantic_core-2.4.0-cp310-none-win_amd64.whl", hash =
"sha256:3ba2c9c94a9176f6321a879c8b864d7c5b12d34f549a4c216c72ce213d7d953c"},
+ {file = "pydantic_core-2.4.0-cp311-cp311-macosx_10_7_x86_64.whl", hash =
"sha256:a8b7acd04896e8f161e1500dc5f218017db05c1d322f054e89cbd089ce5d0071"},
+ {file = "pydantic_core-2.4.0-cp311-cp311-macosx_11_0_arm64.whl", hash =
"sha256:16468bd074fa4567592d3255bf25528ed41e6b616d69bf07096bdb5b66f947d1"},
+ {file =
"pydantic_core-2.4.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
hash =
"sha256:cba5ad5eef02c86a1f3da00544cbc59a510d596b27566479a7cd4d91c6187a11"},
+ {file =
"pydantic_core-2.4.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:b7206e41e04b443016e930e01685bab7a308113c0b251b3f906942c8d4b48fcb"},
+ {file = "pydantic_core-2.4.0-cp311-cp311-manylinux_2_24_armv7l.whl", hash
= "sha256:c1375025f0bfc9155286ebae8eecc65e33e494c90025cda69e247c3ccd2bab00"},
+ {file = "pydantic_core-2.4.0-cp311-cp311-manylinux_2_24_ppc64le.whl", hash
= "sha256:3534118289e33130ed3f1cc487002e8d09b9f359be48b02e9cd3de58ce58fba9"},
+ {file = "pydantic_core-2.4.0-cp311-cp311-manylinux_2_24_s390x.whl", hash =
"sha256:94d2b36a74623caab262bf95f0e365c2c058396082bd9d6a9e825657d0c1e7fa"},
+ {file =
"pydantic_core-2.4.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash
= "sha256:af24ad4fbaa5e4a2000beae0c3b7fd1c78d7819ab90f9370a1cfd8998e3f8a3c"},
+ {file = "pydantic_core-2.4.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash
= "sha256:bf10963d8aed8bbe0165b41797c9463d4c5c8788ae6a77c68427569be6bead41"},
+ {file = "pydantic_core-2.4.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash =
"sha256:68199ada7c310ddb8c76efbb606a0de656b40899388a7498954f423e03fc38be"},
+ {file = "pydantic_core-2.4.0-cp311-none-win32.whl", hash =
"sha256:6f855bcc96ed3dd56da7373cfcc9dcbabbc2073cac7f65c185772d08884790ce"},
+ {file = "pydantic_core-2.4.0-cp311-none-win_amd64.whl", hash =
"sha256:de39eb3bab93a99ddda1ac1b9aa331b944d8bcc4aa9141148f7fd8ee0299dafc"},
+ {file = "pydantic_core-2.4.0-cp312-cp312-macosx_10_7_x86_64.whl", hash =
"sha256:f773b39780323a0499b53ebd91a28ad11cde6705605d98d999dfa08624caf064"},
+ {file = "pydantic_core-2.4.0-cp312-cp312-macosx_11_0_arm64.whl", hash =
"sha256:a297c0d6c61963c5c3726840677b798ca5b7dfc71bc9c02b9a4af11d23236008"},
+ {file =
"pydantic_core-2.4.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
hash =
"sha256:546064c55264156b973b5e65e5fafbe5e62390902ce3cf6b4005765505e8ff56"},
+ {file =
"pydantic_core-2.4.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:36ba9e728588588f0196deaf6751b9222492331b5552f865a8ff120869d372e0"},
+ {file = "pydantic_core-2.4.0-cp312-cp312-manylinux_2_24_armv7l.whl", hash
= "sha256:57a53a75010c635b3ad6499e7721eaa3b450e03f6862afe2dbef9c8f66e46ec8"},
+ {file = "pydantic_core-2.4.0-cp312-cp312-manylinux_2_24_ppc64le.whl", hash
= "sha256:4b262bbc13022f2097c48a21adcc360a81d83dc1d854c11b94953cd46d7d3c07"},
+ {file = "pydantic_core-2.4.0-cp312-cp312-manylinux_2_24_s390x.whl", hash =
"sha256:01947ad728f426fa07fcb26457ebf90ce29320259938414bc0edd1476e75addb"},
+ {file =
"pydantic_core-2.4.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash
= "sha256:b2799c2eaf182769889761d4fb4d78b82bc47dae833799fedbf69fc7de306faa"},
+ {file = "pydantic_core-2.4.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash
= "sha256:a08fd490ba36d1fbb2cd5dcdcfb9f3892deb93bd53456724389135712b5fc735"},
+ {file = "pydantic_core-2.4.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash =
"sha256:1e8a7c62d15a5c4b307271e4252d76ebb981d6251c6ecea4daf203ef0179ea4f"},
+ {file = "pydantic_core-2.4.0-cp312-none-win32.whl", hash =
"sha256:9206c14a67c38de7b916e486ae280017cf394fa4b1aa95cfe88621a4e1d79725"},
+ {file = "pydantic_core-2.4.0-cp312-none-win_amd64.whl", hash =
"sha256:884235507549a6b2d3c4113fb1877ae263109e787d9e0eb25c35982ab28d0399"},
+ {file = "pydantic_core-2.4.0-cp37-cp37m-macosx_10_7_x86_64.whl", hash =
"sha256:4cbe929efa77a806e8f1a97793f2dc3ea3475ae21a9ed0f37c21320fe93f6f50"},
+ {file = "pydantic_core-2.4.0-cp37-cp37m-macosx_11_0_arm64.whl", hash =
"sha256:9137289de8fe845c246a8c3482dd0cb40338846ba683756d8f489a4bd8fddcae"},
+ {file =
"pydantic_core-2.4.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
hash =
"sha256:c5d8e764b5646623e57575f624f8ebb8f7a9f7fd1fae682ef87869ca5fec8dcf"},
+ {file =
"pydantic_core-2.4.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:8fba0aff4c407d0274e43697e785bcac155ad962be57518d1c711f45e72da70f"},
+ {file = "pydantic_core-2.4.0-cp37-cp37m-manylinux_2_24_armv7l.whl", hash =
"sha256:30527d173e826f2f7651f91c821e337073df1555e3b5a0b7b1e2c39e26e50678"},
+ {file = "pydantic_core-2.4.0-cp37-cp37m-manylinux_2_24_ppc64le.whl", hash
= "sha256:bd7d1dde70ff3e09e4bc7a1cbb91a7a538add291bfd5b3e70ef1e7b45192440f"},
+ {file = "pydantic_core-2.4.0-cp37-cp37m-manylinux_2_24_s390x.whl", hash =
"sha256:72f1216ca8cef7b8adacd4c4c6b89c3b0c4f97503197f5284c80f36d6e4edd30"},
+ {file =
"pydantic_core-2.4.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash =
"sha256:b013c7861a7c7bfcec48fd709513fea6f9f31727e7a0a93ca0dd12e056740717"},
+ {file = "pydantic_core-2.4.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash =
"sha256:478f5f6d7e32bd4a04d102160efb2d389432ecf095fe87c555c0a6fc4adfc1a4"},
+ {file = "pydantic_core-2.4.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash =
"sha256:d9610b47b5fe4aacbbba6a9cb5f12cbe864eec99dbfed5710bd32ef5dd8a5d5b"},
+ {file = "pydantic_core-2.4.0-cp37-none-win32.whl", hash =
"sha256:ff246c0111076c8022f9ba325c294f2cb5983403506989253e04dbae565e019b"},
+ {file = "pydantic_core-2.4.0-cp37-none-win_amd64.whl", hash =
"sha256:d0c2b713464a8e263a243ae7980d81ce2de5ac59a9f798a282e44350b42dc516"},
+ {file = "pydantic_core-2.4.0-cp38-cp38-macosx_10_7_x86_64.whl", hash =
"sha256:12ef6838245569fd60a179fade81ca4b90ae2fa0ef355d616f519f7bb27582db"},
+ {file = "pydantic_core-2.4.0-cp38-cp38-macosx_11_0_arm64.whl", hash =
"sha256:49db206eb8fdc4b4f30e6e3e410584146d813c151928f94ec0db06c4f2595538"},
+ {file =
"pydantic_core-2.4.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
hash =
"sha256:0a507d7fa44688bbac76af6521e488b3da93de155b9cba6f2c9b7833ce243d59"},
+ {file =
"pydantic_core-2.4.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:ffe18407a4d000c568182ce5388bbbedeb099896904e43fc14eee76cfae6dec5"},
+ {file = "pydantic_core-2.4.0-cp38-cp38-manylinux_2_24_armv7l.whl", hash =
"sha256:fa8e48001b39d54d97d7b380a0669fa99fc0feeb972e35a2d677ba59164a9a22"},
+ {file = "pydantic_core-2.4.0-cp38-cp38-manylinux_2_24_ppc64le.whl", hash =
"sha256:394f12a2671ff8c4dfa2e85be6c08be0651ad85bc1e6aa9c77c21671baaf28cd"},
+ {file = "pydantic_core-2.4.0-cp38-cp38-manylinux_2_24_s390x.whl", hash =
"sha256:2f9ea0355f90db2a76af530245fa42f04d98f752a1236ed7c6809ec484560d5b"},
+ {file =
"pydantic_core-2.4.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash =
"sha256:61d4e713f467abcdd59b47665d488bb898ad3dd47ce7446522a50e0cbd8e8279"},
+ {file = "pydantic_core-2.4.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash =
"sha256:453862ab268f6326b01f067ed89cb3a527d34dc46f6f4eeec46a15bbc706d0da"},
+ {file = "pydantic_core-2.4.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash =
"sha256:56a85fa0dab1567bd0cac10f0c3837b03e8a0d939e6a8061a3a420acd97e9421"},
+ {file = "pydantic_core-2.4.0-cp38-none-win32.whl", hash =
"sha256:0d726108c1c0380b88b6dd4db559f0280e0ceda9e077f46ff90bc85cd4d03e77"},
+ {file = "pydantic_core-2.4.0-cp38-none-win_amd64.whl", hash =
"sha256:047580388644c473b934d27849f8ed8dbe45df0adb72104e78b543e13bf69762"},
+ {file = "pydantic_core-2.4.0-cp39-cp39-macosx_10_7_x86_64.whl", hash =
"sha256:867d3eea954bea807cabba83cfc939c889a18576d66d197c60025b15269d7cc0"},
+ {file = "pydantic_core-2.4.0-cp39-cp39-macosx_11_0_arm64.whl", hash =
"sha256:664402ef0c238a7f8a46efb101789d5f2275600fb18114446efec83cfadb5b66"},
+ {file =
"pydantic_core-2.4.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
hash =
"sha256:64e8012ad60a5f0da09ed48725e6e923d1be25f2f091a640af6079f874663813"},
+ {file =
"pydantic_core-2.4.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:ac2b680de398f293b68183317432b3d67ab3faeba216aec18de0c395cb5e3060"},
+ {file = "pydantic_core-2.4.0-cp39-cp39-manylinux_2_24_armv7l.whl", hash =
"sha256:8efc1be43b036c2b6bcfb1451df24ee0ddcf69c31351003daf2699ed93f5687b"},
+ {file = "pydantic_core-2.4.0-cp39-cp39-manylinux_2_24_ppc64le.whl", hash =
"sha256:d93aedbc4614cc21b9ab0d0c4ccd7143354c1f7cffbbe96ae5216ad21d1b21b5"},
+ {file = "pydantic_core-2.4.0-cp39-cp39-manylinux_2_24_s390x.whl", hash =
"sha256:af788b64e13d52fc3600a68b16d31fa8d8573e3ff2fc9a38f8a60b8d94d1f012"},
+ {file =
"pydantic_core-2.4.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash =
"sha256:97c6349c81cee2e69ef59eba6e6c08c5936e6b01c2d50b9e4ac152217845ae09"},
+ {file = "pydantic_core-2.4.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash =
"sha256:cc086ddb6dc654a15deeed1d1f2bcb1cb924ebd70df9dca738af19f64229b06c"},
+ {file = "pydantic_core-2.4.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash =
"sha256:e953353180bec330c3b830891d260b6f8e576e2d18db3c78d314e56bb2276066"},
+ {file = "pydantic_core-2.4.0-cp39-none-win32.whl", hash =
"sha256:6feb4b64d11d5420e517910d60a907d08d846cacaf4e029668725cd21d16743c"},
+ {file = "pydantic_core-2.4.0-cp39-none-win_amd64.whl", hash =
"sha256:153a61ac4030fa019b70b31fb7986461119230d3ba0ab661c757cfea652f4332"},
+ {file = "pydantic_core-2.4.0-pp310-pypy310_pp73-macosx_10_7_x86_64.whl",
hash =
"sha256:3fcf529382b282a30b466bd7af05be28e22aa620e016135ac414f14e1ee6b9e1"},
+ {file =
"pydantic_core-2.4.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
hash =
"sha256:2edef05b63d82568b877002dc4cb5cc18f8929b59077120192df1e03e0c633f8"},
+ {file =
"pydantic_core-2.4.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:da055a1b0bfa8041bb2ff586b2cb0353ed03944a3472186a02cc44a557a0e661"},
+ {file =
"pydantic_core-2.4.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl",
hash =
"sha256:77dadc764cf7c5405e04866181c5bd94a447372a9763e473abb63d1dfe9b7387"},
+ {file =
"pydantic_core-2.4.0-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash =
"sha256:a4ea23b07f29487a7bef2a869f68c7ee0e05424d81375ce3d3de829314c6b5ec"},
+ {file = "pydantic_core-2.4.0-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl",
hash =
"sha256:382f0baa044d674ad59455a5eff83d7965572b745cc72df35c52c2ce8c731d37"},
+ {file = "pydantic_core-2.4.0-pp310-pypy310_pp73-win_amd64.whl", hash =
"sha256:08f89697625e453421401c7f661b9d1eb4c9e4c0a12fd256eeb55b06994ac6af"},
+ {file = "pydantic_core-2.4.0-pp37-pypy37_pp73-macosx_10_7_x86_64.whl",
hash =
"sha256:43a405ce520b45941df9ff55d0cd09762017756a7b413bbad3a6e8178e64a2c2"},
+ {file =
"pydantic_core-2.4.0-pp37-pypy37_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
hash =
"sha256:584a7a818c84767af16ce8bda5d4f7fedb37d3d231fc89928a192f567e4ef685"},
+ {file =
"pydantic_core-2.4.0-pp37-pypy37_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:04922fea7b13cd480586fa106345fe06e43220b8327358873c22d8dfa7a711c7"},
+ {file =
"pydantic_core-2.4.0-pp37-pypy37_pp73-manylinux_2_5_i686.manylinux1_i686.whl",
hash =
"sha256:17156abac20a9feed10feec867fddd91a80819a485b0107fe61f09f2117fe5f3"},
+ {file = "pydantic_core-2.4.0-pp37-pypy37_pp73-musllinux_1_1_aarch64.whl",
hash =
"sha256:4e562cc63b04636cde361fd47569162f1daa94c759220ff202a8129902229114"},
+ {file = "pydantic_core-2.4.0-pp37-pypy37_pp73-musllinux_1_1_x86_64.whl",
hash =
"sha256:90f3785146f701e053bb6b9e8f53acce2c919aca91df88bd4975be0cb926eb41"},
+ {file = "pydantic_core-2.4.0-pp37-pypy37_pp73-win_amd64.whl", hash =
"sha256:e40b1e97edd3dc127aa53d8a5e539a3d0c227d71574d3f9ac1af02d58218a122"},
+ {file = "pydantic_core-2.4.0-pp38-pypy38_pp73-macosx_10_7_x86_64.whl",
hash =
"sha256:b27f3e67f6e031f6620655741b7d0d6bebea8b25d415924b3e8bfef2dd7bd841"},
+ {file =
"pydantic_core-2.4.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
hash =
"sha256:be86c2eb12fb0f846262ace9d8f032dc6978b8cb26a058920ecb723dbcb87d05"},
+ {file =
"pydantic_core-2.4.0-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:4665f7ed345012a8d2eddf4203ef145f5f56a291d010382d235b94e91813f88a"},
+ {file =
"pydantic_core-2.4.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl",
hash =
"sha256:79262be5a292d1df060f29b9a7cdd66934801f987a817632d7552534a172709a"},
+ {file = "pydantic_core-2.4.0-pp38-pypy38_pp73-musllinux_1_1_aarch64.whl",
hash =
"sha256:5fd905a69ac74eaba5041e21a1e8b1a479dab2b41c93bdcc4c1cede3c12a8d86"},
+ {file = "pydantic_core-2.4.0-pp38-pypy38_pp73-musllinux_1_1_x86_64.whl",
hash =
"sha256:2ad538b7e07343001934417cdc8584623b4d8823c5b8b258e75ec8d327cec969"},
+ {file = "pydantic_core-2.4.0-pp38-pypy38_pp73-win_amd64.whl", hash =
"sha256:dd2429f7635ad4857b5881503f9c310be7761dc681c467a9d27787b674d1250a"},
+ {file = "pydantic_core-2.4.0-pp39-pypy39_pp73-macosx_10_7_x86_64.whl",
hash =
"sha256:efff8b6761a1f6e45cebd1b7a6406eb2723d2d5710ff0d1b624fe11313693989"},
+ {file =
"pydantic_core-2.4.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
hash =
"sha256:32a1e0352558cd7ccc014ffe818c7d87b15ec6145875e2cc5fa4bb7351a1033d"},
+ {file =
"pydantic_core-2.4.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
hash =
"sha256:a027f41c5008571314861744d83aff75a34cf3a07022e0be32b214a5bc93f7f1"},
+ {file =
"pydantic_core-2.4.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl",
hash =
"sha256:1927f0e15d190f11f0b8344373731e28fd774c6d676d8a6cfadc95c77214a48b"},
+ {file = "pydantic_core-2.4.0-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl",
hash =
"sha256:7aa82d483d5fb867d4fb10a138ffd57b0f1644e99f2f4f336e48790ada9ada5e"},
+ {file = "pydantic_core-2.4.0-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl",
hash =
"sha256:b85778308bf945e9b33ac604e6793df9b07933108d20bdf53811bc7c2798a4af"},
+ {file = "pydantic_core-2.4.0-pp39-pypy39_pp73-win_amd64.whl", hash =
"sha256:3ded19dcaefe2f6706d81e0db787b59095f4ad0fbadce1edffdf092294c8a23f"},
+ {file = "pydantic_core-2.4.0.tar.gz", hash =
"sha256:ec3473c9789cc00c7260d840c3db2c16dbfc816ca70ec87a00cddfa3e1a1cdd5"},
+]
+
+[package.dependencies]
+typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
[[package]]
name = "pygments"
@@ -2624,30 +2718,35 @@ files = [
[[package]]
name = "ray"
-version = "2.6.2"
+version = "2.6.3"
description = "Ray provides a simple, universal API for building distributed
applications."
optional = true
python-versions = "*"
files = [
- {file = "ray-2.6.2-cp310-cp310-macosx_10_15_x86_64.whl", hash =
"sha256:b13eb652e5ad9f4b0a4cb5a52da416d7a8f9686d6de317d1e3fe3486aaf78ce0"},
- {file = "ray-2.6.2-cp310-cp310-macosx_11_0_arm64.whl", hash =
"sha256:120631ce1e6206ceb0395aeca5c1dcef4b0b65dd0a0cd53053e131254da96f0a"},
- {file = "ray-2.6.2-cp310-cp310-manylinux2014_aarch64.whl", hash =
"sha256:8aeef75db0d9922f69a6ab31ffd3df3f40fff404c30bfb4ca76e480c53df98ce"},
- {file = "ray-2.6.2-cp310-cp310-manylinux2014_x86_64.whl", hash =
"sha256:050d3c2ac71a8ca7779c7b590a91400f45e071b298b67727949ffdcc096406e0"},
- {file = "ray-2.6.2-cp311-cp311-macosx_10_15_x86_64.whl", hash =
"sha256:2356454ec63135b6dee3e46b091c76b7daec09bae05aa943a767f884377acda8"},
- {file = "ray-2.6.2-cp311-cp311-macosx_11_0_arm64.whl", hash =
"sha256:e924173198e2d37e29baa7b7893221594139442e9a0b0334fa80f1c8a7b5a6ed"},
- {file = "ray-2.6.2-cp311-cp311-manylinux2014_aarch64.whl", hash =
"sha256:7ee8afffa1c971a71570b6a98de0c69d83c99423a97d29000aeabb706cc1baab"},
- {file = "ray-2.6.2-cp311-cp311-manylinux2014_x86_64.whl", hash =
"sha256:2fa25cf5071082c386dbc086f917f6a4a9f29980eff2cb94d939fd23dd16735a"},
- {file = "ray-2.6.2-cp37-cp37m-macosx_10_15_x86_64.whl", hash =
"sha256:8ecac78203045ef33236ec913775fe9f30d5f3ebe7ee090276c05a3a1fc1ba31"},
- {file = "ray-2.6.2-cp37-cp37m-manylinux2014_aarch64.whl", hash =
"sha256:267b52ed13422f9bc57a7ae89289d2cdb6174fd27c1ea842a2618b2d054cf795"},
- {file = "ray-2.6.2-cp37-cp37m-manylinux2014_x86_64.whl", hash =
"sha256:a0507f49113d98eea24903b5ef384e631997ebfd4934b94ce13c15fbb0adb0c4"},
- {file = "ray-2.6.2-cp38-cp38-macosx_10_15_x86_64.whl", hash =
"sha256:ff3d5ea1057c4d57bae9b921df0bdd98263c91d0fe7a4221a179bb9034005795"},
- {file = "ray-2.6.2-cp38-cp38-macosx_11_0_arm64.whl", hash =
"sha256:302213def2b0d9fd2039d14b992a2dffa5b4db36ac8d154a219d8bd8e580d694"},
- {file = "ray-2.6.2-cp38-cp38-manylinux2014_aarch64.whl", hash =
"sha256:8f34ddd7012b5908d19a12e452138e941d83e38a1fbce2db4545c281957bbda5"},
- {file = "ray-2.6.2-cp38-cp38-manylinux2014_x86_64.whl", hash =
"sha256:9a4cdabff16caaed76e9b7f2a9d94cfbae30073b6dc8956f55d70b99e218d1dc"},
- {file = "ray-2.6.2-cp39-cp39-macosx_10_15_x86_64.whl", hash =
"sha256:86b57fb864a9328971d7e554e0c55608735e94434f13948a58bb07423c783a39"},
- {file = "ray-2.6.2-cp39-cp39-macosx_11_0_arm64.whl", hash =
"sha256:21608b9dbc19175fbb1832ec9296e7c93cf416d28b0e21ee7e2667da7af952b7"},
- {file = "ray-2.6.2-cp39-cp39-manylinux2014_aarch64.whl", hash =
"sha256:4ab10849705f41923ec1ccf597f3881c9f3f304c43da3e6f5b794c2072694f77"},
- {file = "ray-2.6.2-cp39-cp39-manylinux2014_x86_64.whl", hash =
"sha256:3f359fdb88406528c564e66dbe848ab98679ddb046d44465ebd3933fa1ecdd62"},
+ {file = "ray-2.6.3-cp310-cp310-macosx_10_15_x86_64.whl", hash =
"sha256:8a3cde58dba07da7a62e1f804b3dae5b29de3be052e02e4559bff7e7cb4d4a3b"},
+ {file = "ray-2.6.3-cp310-cp310-macosx_11_0_arm64.whl", hash =
"sha256:81e2ee7252e2fbfb05a24124774a8de563daa261200a08d9cbc6b499f7262af1"},
+ {file = "ray-2.6.3-cp310-cp310-manylinux2014_aarch64.whl", hash =
"sha256:485e4cd46a569416a14a72c06fe7901b0e3902f3023100b375c477975824e707"},
+ {file = "ray-2.6.3-cp310-cp310-manylinux2014_x86_64.whl", hash =
"sha256:abc6a537454506a5fa87137de058d12aeea38da7077aae6f0ebf6199e5f5b2a1"},
+ {file = "ray-2.6.3-cp310-cp310-win_amd64.whl", hash =
"sha256:787ec7f43f5b3ed85728cf4878bdfed0a334d9108b6af75ef3fe5c8d44a7f74d"},
+ {file = "ray-2.6.3-cp311-cp311-macosx_10_15_x86_64.whl", hash =
"sha256:bdeacaafcbb97e5f1c3c3349e7fcc0c40f691cea2bf057027c5491ea1ac929b0"},
+ {file = "ray-2.6.3-cp311-cp311-macosx_11_0_arm64.whl", hash =
"sha256:4b4600c93e2e94b6ca75ef4b4cb92d7f98d4be5484273d6fbac4218fb82cf96f"},
+ {file = "ray-2.6.3-cp311-cp311-manylinux2014_aarch64.whl", hash =
"sha256:0a5870f9a16cb94080d770f83326d7e2163d88d75be240273cef4b932a071bb2"},
+ {file = "ray-2.6.3-cp311-cp311-manylinux2014_x86_64.whl", hash =
"sha256:1a8de31a9a4049134cf7e97b725a4078c958a964d091cb3e812e31eddd013bd7"},
+ {file = "ray-2.6.3-cp311-cp311-win_amd64.whl", hash =
"sha256:56b920a1814decdd20a754b7c5048770684d6d3d242c83aa99da5d3e8c339f13"},
+ {file = "ray-2.6.3-cp37-cp37m-macosx_10_15_x86_64.whl", hash =
"sha256:b358fd112876c3a249fd8cffbf20b26622817c78b2ade0a725a7036c693f8d70"},
+ {file = "ray-2.6.3-cp37-cp37m-manylinux2014_aarch64.whl", hash =
"sha256:467b9aa63f09d20e3985457816d703fe27ea388cdcaa88ff5eff222f8074a05c"},
+ {file = "ray-2.6.3-cp37-cp37m-manylinux2014_x86_64.whl", hash =
"sha256:dff21468d621c8dac95b3df320e6c6121f6618f6827243fd75a057c8815c2498"},
+ {file = "ray-2.6.3-cp37-cp37m-win_amd64.whl", hash =
"sha256:e0f8eaf4c4592335722dad474685c2ffc98207b997e47a24b297a60db389a4cb"},
+ {file = "ray-2.6.3-cp38-cp38-macosx_10_15_x86_64.whl", hash =
"sha256:31f1dd05130e712b9b64ccad9e6eaa82c715bb25a0a45ffd48ebf4953f6fe347"},
+ {file = "ray-2.6.3-cp38-cp38-macosx_11_0_arm64.whl", hash =
"sha256:90b780e131f891185f9de2b9c08d1f2d729e5755c7389a1ddaa6f796fae0d787"},
+ {file = "ray-2.6.3-cp38-cp38-manylinux2014_aarch64.whl", hash =
"sha256:3e5a4bbc29268a64bd2a8d48ed60f32a5bcce285a2a4f4339174947733449e37"},
+ {file = "ray-2.6.3-cp38-cp38-manylinux2014_x86_64.whl", hash =
"sha256:a182a80aebf863b5d4e875bed0a80e83200e84f4f63c4126cef87cc01e43f067"},
+ {file = "ray-2.6.3-cp38-cp38-win_amd64.whl", hash =
"sha256:015a2aa30aba0719d20cdf8fa32c689b68016678cb20f46bd1df8b227c938b84"},
+ {file = "ray-2.6.3-cp39-cp39-macosx_10_15_x86_64.whl", hash =
"sha256:3ccf809e5948333c1c8c81694514b5900259e79cbdc8bddd3680695820cafcf2"},
+ {file = "ray-2.6.3-cp39-cp39-macosx_11_0_arm64.whl", hash =
"sha256:a4ef2f52319286720be7f3bfe6043e9fd0b8cb7826cb2ffc90c23c1c42427464"},
+ {file = "ray-2.6.3-cp39-cp39-manylinux2014_aarch64.whl", hash =
"sha256:5923849ec0854ab3e5ca8873d47ed7e11074e1213a3c40f8864c9500de034313"},
+ {file = "ray-2.6.3-cp39-cp39-manylinux2014_x86_64.whl", hash =
"sha256:18d033cc468e5171d9995476c33f99a5b79f091c34265c7e9f3d8b1c9042437e"},
+ {file = "ray-2.6.3-cp39-cp39-win_amd64.whl", hash =
"sha256:bca66c8e8163f06dc5443623e7b221660529a39574a589ba9257f2188ea8bf6b"},
]
[package.dependencies]
@@ -2672,9 +2771,9 @@ requests = "*"
[package.extras]
air = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi",
"fsspec", "gpustat (>=1.0.0)", "numpy (>=1.20)", "opencensus", "pandas",
"pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow
(>=6.0.1)", "pydantic (<2)", "requests", "smart-open", "starlette",
"tensorboardX (>=1.9)", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)"]
-all = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "dm-tree",
"fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (!=1.56.0)", "gymnasium
(==0.26.3)", "lz4", "numpy (>=1.20)", "opencensus", "opentelemetry-api",
"opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)",
"prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)",
"pydantic (<2)", "pyyaml", "ray-cpp (==2.6.2)", "requests", "rich",
"scikit-image", "scipy", "smart-open", "starlet [...]
+all = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "dm-tree",
"fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (!=1.56.0)", "gymnasium
(==0.26.3)", "lz4", "numpy (>=1.20)", "opencensus", "opentelemetry-api",
"opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)",
"prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)",
"pydantic (<2)", "pyyaml", "ray-cpp (==2.6.3)", "requests", "rich",
"scikit-image", "scipy", "smart-open", "starlet [...]
client = ["grpcio (!=1.56.0)"]
-cpp = ["ray-cpp (==2.6.2)"]
+cpp = ["ray-cpp (==2.6.3)"]
data = ["fsspec", "numpy (>=1.20)", "pandas (>=1.3)", "pyarrow (>=6.0.1)"]
default = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "gpustat (>=1.0.0)",
"opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic
(<2)", "requests", "smart-open", "virtualenv (>=20.0.24,<20.21.1)"]
observability = ["opentelemetry-api", "opentelemetry-exporter-otlp",
"opentelemetry-sdk"]
@@ -3470,4 +3569,4 @@ zstandard = ["zstandard"]
[metadata]
lock-version = "2.0"
python-versions = "^3.8"
-content-hash =
"e547ce429123b5ec2bd378b2a61fb8e5a9650a041faf32367109d4855dedbb81"
+content-hash =
"0fa00a57034a7fbcf2aec0d81f69b6c2d823440081e5d746f11585a221a272fa"
diff --git a/python/pyiceberg/catalog/__init__.py
b/python/pyiceberg/catalog/__init__.py
index bc42e5ab09..ff0d5095c8 100644
--- a/python/pyiceberg/catalog/__init__.py
+++ b/python/pyiceberg/catalog/__init__.py
@@ -537,7 +537,7 @@ class Catalog(ABC):
io = load_file_io(self.properties, table.metadata_location)
metadata = table.metadata
manifest_lists_to_delete = set()
- manifests_to_delete = []
+ manifests_to_delete: List[ManifestFile] = []
for snapshot in metadata.snapshots:
manifests_to_delete += snapshot.manifests(io)
if snapshot.manifest_list is not None:
diff --git a/python/pyiceberg/catalog/rest.py b/python/pyiceberg/catalog/rest.py
index fcf55bb830..13cea9b1c8 100644
--- a/python/pyiceberg/catalog/rest.py
+++ b/python/pyiceberg/catalog/rest.py
@@ -177,8 +177,8 @@ class OAuthErrorResponse(IcebergBaseModel):
error: Literal[
"invalid_request", "invalid_client", "invalid_grant",
"unauthorized_client", "unsupported_grant_type", "invalid_scope"
]
- error_description: Optional[str]
- error_uri: Optional[str]
+ error_description: Optional[str] = None
+ error_uri: Optional[str] = None
class RestCatalog(Catalog):
@@ -430,7 +430,7 @@ class RestCatalog(Catalog):
write_order=sort_order,
properties=properties,
)
- serialized_json = request.json()
+ serialized_json = request.model_dump_json().encode("utf-8")
response = self._session.post(
self.url(Endpoints.create_table,
namespace=namespace_and_table["namespace"]),
data=serialized_json,
@@ -507,7 +507,7 @@ class RestCatalog(Catalog):
"""
response = self._session.post(
self.url(Endpoints.update_table, prefixed=True,
**self._split_identifier_for_path(table_request.identifier)),
- data=table_request.json(),
+ data=table_request.model_dump_json().encode("utf-8"),
)
try:
response.raise_for_status()
diff --git a/python/pyiceberg/cli/output.py b/python/pyiceberg/cli/output.py
index aa01fb49f7..299f84dafe 100644
--- a/python/pyiceberg/cli/output.py
+++ b/python/pyiceberg/cli/output.py
@@ -200,7 +200,11 @@ class JsonOutput(Output):
metadata_location: str
metadata: TableMetadata
- print(FauxTable(identifier=table.identifier, metadata=table.metadata,
metadata_location=table.metadata_location).json())
+ print(
+ FauxTable(
+ identifier=table.identifier, metadata=table.metadata,
metadata_location=table.metadata_location
+ ).model_dump_json()
+ )
def describe_properties(self, properties: Properties) -> None:
self._out(properties)
@@ -209,13 +213,13 @@ class JsonOutput(Output):
print(json.dumps(response))
def schema(self, schema: Schema) -> None:
- print(schema.json())
+ print(schema.model_dump_json())
def files(self, table: Table, history: bool) -> None:
pass
def spec(self, spec: PartitionSpec) -> None:
- print(spec.json())
+ print(spec.model_dump_json())
def uuid(self, uuid: Optional[UUID]) -> None:
self._out({"uuid": str(uuid) if uuid else "missing"})
diff --git a/python/pyiceberg/io/pyarrow.py b/python/pyiceberg/io/pyarrow.py
index 2e33f7174c..296a78903e 100644
--- a/python/pyiceberg/io/pyarrow.py
+++ b/python/pyiceberg/io/pyarrow.py
@@ -783,7 +783,7 @@ def _task_to_table(
schema_raw = metadata.get(ICEBERG_SCHEMA)
# TODO: if field_ids are not present, Name Mapping should be
implemented to look them up in the table schema,
# see https://github.com/apache/iceberg/issues/7451
- file_schema = Schema.parse_raw(schema_raw) if schema_raw is not None
else pyarrow_to_schema(physical_schema)
+ file_schema = Schema.model_validate_json(schema_raw) if schema_raw is
not None else pyarrow_to_schema(physical_schema)
pyarrow_filter = None
if bound_row_filter is not AlwaysTrue():
diff --git a/python/pyiceberg/partitioning.py b/python/pyiceberg/partitioning.py
index 935dbea68f..bffb2c2f9b 100644
--- a/python/pyiceberg/partitioning.py
+++ b/python/pyiceberg/partitioning.py
@@ -14,6 +14,8 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+from __future__ import annotations
+
from functools import cached_property
from typing import (
Any,
@@ -23,15 +25,21 @@ from typing import (
Tuple,
)
-from pydantic import Field
+from pydantic import (
+ BeforeValidator,
+ Field,
+ PlainSerializer,
+ WithJsonSchema,
+)
+from typing_extensions import Annotated
from pyiceberg.schema import Schema
-from pyiceberg.transforms import Transform
+from pyiceberg.transforms import Transform, parse_transform
from pyiceberg.typedef import IcebergBaseModel
from pyiceberg.types import NestedField, StructType
INITIAL_PARTITION_SPEC_ID = 0
-_PARTITION_DATA_ID_START: int = 1000
+PARTITION_FIELD_ID_START: int = 1000
class PartitionField(IcebergBaseModel):
@@ -46,7 +54,12 @@ class PartitionField(IcebergBaseModel):
source_id: int = Field(alias="source-id")
field_id: int = Field(alias="field-id")
- transform: Transform[Any, Any] = Field()
+ transform: Annotated[ # type: ignore
+ Transform,
+ BeforeValidator(parse_transform),
+ PlainSerializer(lambda c: str(c), return_type=str), # pylint:
disable=W0108
+ WithJsonSchema({"type": "string"}, mode="serialization"),
+ ] = Field()
name: str = Field()
def __init__(
@@ -65,6 +78,7 @@ class PartitionField(IcebergBaseModel):
data["transform"] = transform
if name is not None:
data["name"] = name
+
super().__init__(**data)
def __str__(self) -> str:
@@ -82,7 +96,7 @@ class PartitionSpec(IcebergBaseModel):
"""
spec_id: int = Field(alias="spec-id", default=INITIAL_PARTITION_SPEC_ID)
- fields: Tuple[PartitionField, ...] = Field(alias="fields",
default_factory=tuple)
+ fields: Tuple[PartitionField, ...] = Field(default_factory=tuple)
def __init__(
self,
@@ -129,7 +143,7 @@ class PartitionSpec(IcebergBaseModel):
def last_assigned_field_id(self) -> int:
if self.fields:
return max(pf.field_id for pf in self.fields)
- return _PARTITION_DATA_ID_START
+ return PARTITION_FIELD_ID_START
@cached_property
def source_id_to_fields_map(self) -> Dict[int, List[PartitionField]]:
@@ -143,7 +157,7 @@ class PartitionSpec(IcebergBaseModel):
def fields_by_source_id(self, field_id: int) -> List[PartitionField]:
return self.source_id_to_fields_map.get(field_id, [])
- def compatible_with(self, other: "PartitionSpec") -> bool:
+ def compatible_with(self, other: PartitionSpec) -> bool:
"""Produce a boolean to return True if two PartitionSpec are
considered compatible."""
if self == other:
return True
@@ -196,7 +210,7 @@ def assign_fresh_partition_spec_ids(spec: PartitionSpec,
old_schema: Schema, fre
PartitionField(
name=field.name,
source_id=fresh_field.field_id,
- field_id=_PARTITION_DATA_ID_START + pos,
+ field_id=PARTITION_FIELD_ID_START + pos,
transform=field.transform,
)
)
diff --git a/python/pyiceberg/serializers.py b/python/pyiceberg/serializers.py
index 440b265827..794d5364f5 100644
--- a/python/pyiceberg/serializers.py
+++ b/python/pyiceberg/serializers.py
@@ -126,6 +126,6 @@ class ToOutputFile:
overwrite (bool): Where to overwrite the file if it already
exists. Defaults to `False`.
"""
with output_file.create(overwrite=overwrite) as output_stream:
- json_bytes = metadata.json().encode("utf-8")
+ json_bytes = metadata.model_dump_json().encode("utf-8")
json_bytes =
Compressor.get_compressor(output_file.location).bytes_compressor()(json_bytes)
output_stream.write(json_bytes)
diff --git a/python/pyiceberg/table/__init__.py
b/python/pyiceberg/table/__init__.py
index 4ede2582fc..52479c29ca 100644
--- a/python/pyiceberg/table/__init__.py
+++ b/python/pyiceberg/table/__init__.py
@@ -36,7 +36,7 @@ from typing import (
Union,
)
-from pydantic import Field
+from pydantic import Field, SerializeAsAny
from sortedcontainers import SortedList
from pyiceberg.expressions import (
@@ -365,8 +365,8 @@ class AssertDefaultSortOrderId(TableRequirement):
class CommitTableRequest(IcebergBaseModel):
identifier: Identifier = Field()
- requirements: List[TableRequirement] = Field(default_factory=list)
- updates: List[TableUpdate] = Field(default_factory=list)
+ requirements: List[SerializeAsAny[TableRequirement]] =
Field(default_factory=list)
+ updates: List[SerializeAsAny[TableUpdate]] = Field(default_factory=list)
class CommitTableResponse(IcebergBaseModel):
diff --git a/python/pyiceberg/table/metadata.py
b/python/pyiceberg/table/metadata.py
index b5da6413f4..e6a3e6f16e 100644
--- a/python/pyiceberg/table/metadata.py
+++ b/python/pyiceberg/table/metadata.py
@@ -14,6 +14,8 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
+from __future__ import annotations
+
import datetime
import uuid
from copy import copy
@@ -28,11 +30,11 @@ from typing import (
from pydantic import Field
from pydantic import ValidationError as PydanticValidationError
-from pydantic import root_validator
+from pydantic import model_validator
from typing_extensions import Annotated
from pyiceberg.exceptions import ValidationError
-from pyiceberg.partitioning import PartitionSpec,
assign_fresh_partition_spec_ids
+from pyiceberg.partitioning import PARTITION_FIELD_ID_START, PartitionSpec,
assign_fresh_partition_spec_ids
from pyiceberg.schema import Schema, assign_fresh_schema_ids
from pyiceberg.table.refs import MAIN_BRANCH, SnapshotRef, SnapshotRefType
from pyiceberg.table.snapshots import MetadataLogEntry, Snapshot,
SnapshotLogEntry
@@ -42,58 +44,87 @@ from pyiceberg.table.sorting import (
SortOrder,
assign_fresh_sort_order_ids,
)
-from pyiceberg.typedef import EMPTY_DICT, IcebergBaseModel, Properties
+from pyiceberg.typedef import (
+ EMPTY_DICT,
+ IcebergBaseModel,
+ IcebergRootModel,
+ Properties,
+)
from pyiceberg.utils.datetime import datetime_to_millis
-CURRENT_SNAPSHOT_ID = "current_snapshot_id"
-CURRENT_SCHEMA_ID = "current_schema_id"
+CURRENT_SNAPSHOT_ID = "current-snapshot-id"
+CURRENT_SCHEMA_ID = "current-schema-id"
SCHEMAS = "schemas"
-DEFAULT_SPEC_ID = "default_spec_id"
-PARTITION_SPEC = "partition_spec"
-PARTITION_SPECS = "partition_specs"
-SORT_ORDERS = "sort_orders"
+DEFAULT_SPEC_ID = "default-spec-id"
+PARTITION_SPEC = "partition-spec"
+PARTITION_SPECS = "partition-specs"
+SORT_ORDERS = "sort-orders"
+LAST_PARTITION_ID = "last-partition-id"
+LAST_ASSIGNED_FIELD_ID = "last-assigned-field-id"
REFS = "refs"
+SPEC_ID = "spec-id"
+FIELD_ID = "field-id"
+FIELDS = "fields"
INITIAL_SEQUENCE_NUMBER = 0
INITIAL_SPEC_ID = 0
DEFAULT_SCHEMA_ID = 0
-def check_schemas(values: Dict[str, Any]) -> Dict[str, Any]:
+def cleanup_snapshot_id(data: Dict[str, Any]) -> Dict[str, Any]:
+ """Runs before validation."""
+ if CURRENT_SNAPSHOT_ID in data and data[CURRENT_SNAPSHOT_ID] == -1:
+ # We treat -1 and None the same, by cleaning this up
+ # in a pre-validator, we can simplify the logic later on
+ data[CURRENT_SNAPSHOT_ID] = None
+ return data
+
+
+def check_schemas(table_metadata: TableMetadata) -> TableMetadata:
"""Validator to check if the current-schema-id is actually present in
schemas."""
- current_schema_id = values[CURRENT_SCHEMA_ID]
+ current_schema_id = table_metadata.current_schema_id
- for schema in values[SCHEMAS]:
+ for schema in table_metadata.schemas:
if schema.schema_id == current_schema_id:
- return values
+ return table_metadata
raise ValidationError(f"current-schema-id {current_schema_id} can't be
found in the schemas")
-def check_partition_specs(values: Dict[str, Any]) -> Dict[str, Any]:
+def check_partition_specs(table_metadata: TableMetadata) -> TableMetadata:
"""Validator to check if the default-spec-id is present in
partition-specs."""
- default_spec_id = values["default_spec_id"]
+ default_spec_id = table_metadata.default_spec_id
- partition_specs: List[PartitionSpec] = values[PARTITION_SPECS]
+ partition_specs: List[PartitionSpec] = table_metadata.partition_specs
for spec in partition_specs:
if spec.spec_id == default_spec_id:
- return values
+ return table_metadata
raise ValidationError(f"default-spec-id {default_spec_id} can't be found")
-def check_sort_orders(values: Dict[str, Any]) -> Dict[str, Any]:
+def check_sort_orders(table_metadata: TableMetadata) -> TableMetadata:
"""Validator to check if the default_sort_order_id is present in
sort-orders."""
- default_sort_order_id: int = values["default_sort_order_id"]
+ default_sort_order_id: int = table_metadata.default_sort_order_id
if default_sort_order_id != UNSORTED_SORT_ORDER_ID:
- sort_orders: List[SortOrder] = values[SORT_ORDERS]
+ sort_orders: List[SortOrder] = table_metadata.sort_orders
for sort_order in sort_orders:
if sort_order.order_id == default_sort_order_id:
- return values
+ return table_metadata
raise ValidationError(f"default-sort-order-id {default_sort_order_id}
can't be found in {sort_orders}")
- return values
+ return table_metadata
+
+
+def construct_refs(table_metadata: TableMetadata) -> TableMetadata:
+ """Sets the main branch if missing."""
+ if table_metadata.current_snapshot_id is not None:
+ if MAIN_BRANCH not in table_metadata.refs:
+ table_metadata.refs[MAIN_BRANCH] = SnapshotRef(
+ snapshot_id=table_metadata.current_snapshot_id,
snapshot_ref_type=SnapshotRefType.BRANCH
+ )
+ return table_metadata
class TableMetadataCommonFields(IcebergBaseModel):
@@ -102,22 +133,6 @@ class TableMetadataCommonFields(IcebergBaseModel):
https://iceberg.apache.org/spec/#iceberg-table-spec
"""
- @root_validator(skip_on_failure=True)
- def cleanup_snapshot_id(cls, data: Dict[str, Any]) -> Dict[str, Any]:
- if data[CURRENT_SNAPSHOT_ID] == -1:
- # We treat -1 and None the same, by cleaning this up
- # in a pre-validator, we can simplify the logic later on
- data[CURRENT_SNAPSHOT_ID] = None
- return data
-
- @root_validator(skip_on_failure=True)
- def construct_refs(cls, data: Dict[str, Any]) -> Dict[str, Any]:
- # This is going to be much nicer as soon as refs is an actual pydantic
object
- if current_snapshot_id := data.get(CURRENT_SNAPSHOT_ID):
- if MAIN_BRANCH not in data[REFS]:
- data[REFS][MAIN_BRANCH] =
SnapshotRef(snapshot_id=current_snapshot_id,
snapshot_ref_type=SnapshotRefType.BRANCH)
- return data
-
location: str = Field()
"""The table’s base location. This is used by writers to determine where
to store data files, manifest files, and table metadata files."""
@@ -217,7 +232,15 @@ class TableMetadataV1(TableMetadataCommonFields,
IcebergBaseModel):
# because bumping the version should be an explicit operation that is up
# to the owner of the table.
- @root_validator
+ @model_validator(mode="before")
+ def cleanup_snapshot_id(cls, data: Dict[str, Any]) -> Dict[str, Any]:
+ return cleanup_snapshot_id(data)
+
+ @model_validator(mode="after")
+ def construct_refs(cls, data: TableMetadataV1) -> TableMetadataV1:
+ return construct_refs(data)
+
+ @model_validator(mode="before")
def set_v2_compatible_defaults(cls, data: Dict[str, Any]) -> Dict[str,
Any]:
"""Sets default values to be compatible with the format v2.
@@ -233,7 +256,7 @@ class TableMetadataV1(TableMetadataCommonFields,
IcebergBaseModel):
return data
- @root_validator(skip_on_failure=True)
+ @model_validator(mode="before")
def construct_schemas(cls, data: Dict[str, Any]) -> Dict[str, Any]:
"""Converts the schema into schemas.
@@ -248,13 +271,11 @@ class TableMetadataV1(TableMetadataCommonFields,
IcebergBaseModel):
The TableMetadata with the schemas set, if not provided.
"""
if not data.get("schemas"):
- schema = data["schema_"]
+ schema = data["schema"]
data["schemas"] = [schema]
- else:
- check_schemas(data)
return data
- @root_validator(skip_on_failure=True)
+ @model_validator(mode="before")
def construct_partition_specs(cls, data: Dict[str, Any]) -> Dict[str, Any]:
"""Converts the partition_spec into partition_specs.
@@ -269,20 +290,21 @@ class TableMetadataV1(TableMetadataCommonFields,
IcebergBaseModel):
The TableMetadata with the partition_specs set, if not provided.
"""
if not data.get(PARTITION_SPECS):
- fields = data[PARTITION_SPEC]
- migrated_spec = PartitionSpec(*fields)
- data[PARTITION_SPECS] = [migrated_spec]
- data[DEFAULT_SPEC_ID] = migrated_spec.spec_id
- else:
- check_partition_specs(data)
-
- if "last_partition_id" not in data or data.get("last_partition_id") is
None:
- if partition_specs := data.get(PARTITION_SPECS):
- data["last_partition_id"] = max(spec.last_assigned_field_id
for spec in partition_specs)
+ if data.get(PARTITION_SPEC) is not None:
+ # Promote the spec from partition-spec to partition-specs
+ fields = data[PARTITION_SPEC]
+ data[PARTITION_SPECS] = [{SPEC_ID: INITIAL_SPEC_ID, FIELDS:
fields}]
+ data[DEFAULT_SPEC_ID] = INITIAL_SPEC_ID
+ else:
+ data[PARTITION_SPECS] = [{"field-id": 0, "fields": ()}]
+
+ data[LAST_PARTITION_ID] = max(
+ [field.get(FIELD_ID) for spec in data[PARTITION_SPECS] for field
in spec[FIELDS]], default=PARTITION_FIELD_ID_START
+ )
return data
- @root_validator(skip_on_failure=True)
+ @model_validator(mode="before")
def set_sort_orders(cls, data: Dict[str, Any]) -> Dict[str, Any]:
"""Sets the sort_orders if not provided.
@@ -297,14 +319,12 @@ class TableMetadataV1(TableMetadataCommonFields,
IcebergBaseModel):
"""
if not data.get(SORT_ORDERS):
data[SORT_ORDERS] = [UNSORTED_SORT_ORDER]
- else:
- check_sort_orders(data)
return data
- def to_v2(self) -> "TableMetadataV2":
- metadata = copy(self.dict())
- metadata["format_version"] = 2
- return TableMetadataV2(**metadata)
+ def to_v2(self) -> TableMetadataV2:
+ metadata = copy(self.model_dump())
+ metadata["format-version"] = 2
+ return TableMetadataV2.model_validate(metadata)
format_version: Literal[1] = Field(alias="format-version")
"""An integer version number for the format. Currently, this can be 1 or 2
@@ -334,17 +354,25 @@ class TableMetadataV2(TableMetadataCommonFields,
IcebergBaseModel):
https://iceberg.apache.org/spec/#version-2-row-level-deletes
"""
- @root_validator(skip_on_failure=True)
- def check_schemas(cls, values: Dict[str, Any]) -> Dict[str, Any]:
- return check_schemas(values)
+ @model_validator(mode="before")
+ def cleanup_snapshot_id(cls, data: Dict[str, Any]) -> Dict[str, Any]:
+ return cleanup_snapshot_id(data)
+
+ @model_validator(mode="after")
+ def check_schemas(cls, table_metadata: TableMetadata) -> TableMetadata:
+ return check_schemas(table_metadata)
+
+ @model_validator(mode="after")
+ def check_partition_specs(cls, table_metadata: TableMetadata) ->
TableMetadata:
+ return check_partition_specs(table_metadata)
- @root_validator
- def check_partition_specs(cls, values: Dict[str, Any]) -> Dict[str, Any]:
- return check_partition_specs(values)
+ @model_validator(mode="after")
+ def check_sort_orders(cls, table_metadata: TableMetadata) -> TableMetadata:
+ return check_sort_orders(table_metadata)
- @root_validator(skip_on_failure=True)
- def check_sort_orders(cls, values: Dict[str, Any]) -> Dict[str, Any]:
- return check_sort_orders(values)
+ @model_validator(mode="after")
+ def construct_refs(cls, table_metadata: TableMetadata) -> TableMetadata:
+ return construct_refs(table_metadata)
format_version: Literal[2] = Field(alias="format-version", default=2)
"""An integer version number for the format. Currently, this can be 1 or 2
@@ -359,15 +387,6 @@ class TableMetadataV2(TableMetadataCommonFields,
IcebergBaseModel):
TableMetadata = Annotated[Union[TableMetadataV1, TableMetadataV2],
Field(discriminator="format_version")]
-class TableMetadataFactory(IcebergBaseModel):
- table_metadata: TableMetadata
-
- @classmethod
- def parse_data(cls, data: str) -> "TableMetadataFactory":
- labeled_data = f'{{"table_metadata": {data}}}'
- return cls.parse_raw(labeled_data)
-
-
def new_table_metadata(
schema: Schema, partition_spec: PartitionSpec, sort_order: SortOrder,
location: str, properties: Properties = EMPTY_DICT
) -> TableMetadata:
@@ -389,14 +408,17 @@ def new_table_metadata(
)
+class TableMetadataWrapper(IcebergRootModel[TableMetadata]):
+ root: TableMetadata
+
+
class TableMetadataUtil:
"""Helper class for parsing TableMetadata."""
@staticmethod
def parse_raw(data: str) -> TableMetadata:
try:
- table_metadata_factory = TableMetadataFactory.parse_data(data)
- return table_metadata_factory.table_metadata
+ return TableMetadataWrapper.model_validate_json(data).root
except PydanticValidationError as e:
raise ValidationError(e) from e
@@ -412,3 +434,6 @@ class TableMetadataUtil:
return TableMetadataV2(**data)
else:
raise ValidationError(f"Unknown format version: {format_version}")
+
+
+TableMetadata = Annotated[Union[TableMetadataV1, TableMetadataV2],
Field(discriminator="format_version")] # type: ignore
diff --git a/python/pyiceberg/table/snapshots.py
b/python/pyiceberg/table/snapshots.py
index b3d90eebb7..c93986c0b8 100644
--- a/python/pyiceberg/table/snapshots.py
+++ b/python/pyiceberg/table/snapshots.py
@@ -20,10 +20,9 @@ from typing import (
Dict,
List,
Optional,
- Union,
)
-from pydantic import Field, PrivateAttr, root_validator
+from pydantic import Field, PrivateAttr, model_serializer
from pyiceberg.io import FileIO
from pyiceberg.manifest import ManifestFile, read_manifest_list
@@ -59,34 +58,19 @@ class Summary(IcebergBaseModel):
like snapshot expiration, to skip processing certain snapshots.
"""
- __root__: Dict[str, Union[str, Operation]]
+ operation: Operation = Field()
_additional_properties: Dict[str, str] = PrivateAttr()
- @root_validator
- def check_operation(cls, values: Dict[str, Dict[str, Union[str,
Operation]]]) -> Dict[str, Dict[str, Union[str, Operation]]]:
- if operation := values["__root__"].get(OPERATION):
- if isinstance(operation, str):
- values["__root__"][OPERATION] = Operation(operation.lower())
- else:
- raise ValueError("Operation not set")
- return values
-
- def __init__(
- self, operation: Optional[Operation] = None, __root__:
Optional[Dict[str, Union[str, Operation]]] = None, **data: Any
- ) -> None:
- super().__init__(__root__={"operation": operation, **data} if not
__root__ else __root__)
- self._additional_properties = {
- k: v for k, v in self.__root__.items() if k != OPERATION # type:
ignore # We know that they are all string, and we don't want to check
- }
+ def __init__(self, operation: Operation, **data: Any) -> None:
+ super().__init__(operation=operation, **data)
+ self._additional_properties = data
- @property
- def operation(self) -> Operation:
- operation = self.__root__[OPERATION]
- if isinstance(operation, Operation):
- return operation
- else:
- # Should never happen
- raise ValueError(f"Unknown type of operation: {operation}")
+ @model_serializer
+ def ser_model(self) -> Dict[str, str]:
+ return {
+ "operation": str(self.operation.value),
+ **self._additional_properties,
+ }
@property
def additional_properties(self) -> Dict[str, str]:
diff --git a/python/pyiceberg/table/sorting.py
b/python/pyiceberg/table/sorting.py
index 3fbc82465f..a044774cb7 100644
--- a/python/pyiceberg/table/sorting.py
+++ b/python/pyiceberg/table/sorting.py
@@ -25,10 +25,17 @@ from typing import (
Union,
)
-from pydantic import Field, root_validator
+from pydantic import (
+ BeforeValidator,
+ Field,
+ PlainSerializer,
+ WithJsonSchema,
+ model_validator,
+)
+from typing_extensions import Annotated
from pyiceberg.schema import Schema
-from pyiceberg.transforms import IdentityTransform, Transform
+from pyiceberg.transforms import IdentityTransform, Transform, parse_transform
from pyiceberg.typedef import IcebergBaseModel
from pyiceberg.types import IcebergType
@@ -88,7 +95,7 @@ class SortField(IcebergBaseModel):
data["null-order"] = null_order
super().__init__(**data)
- @root_validator(pre=True)
+ @model_validator(mode="before")
def set_null_order(cls, values: Dict[str, Any]) -> Dict[str, Any]:
values["direction"] = values["direction"] if values.get("direction")
else SortDirection.ASC
if not values.get("null-order"):
@@ -96,7 +103,12 @@ class SortField(IcebergBaseModel):
return values
source_id: int = Field(alias="source-id")
- transform: Transform[Any, Any] = Field()
+ transform: Annotated[ # type: ignore
+ Transform,
+ BeforeValidator(parse_transform),
+ PlainSerializer(lambda c: str(c), return_type=str), # pylint:
disable=W0108
+ WithJsonSchema({"type": "string"}, mode="serialization"),
+ ] = Field()
direction: SortDirection = Field()
null_order: NullOrder = Field(alias="null-order")
diff --git a/python/pyiceberg/transforms.py b/python/pyiceberg/transforms.py
index 3e90c911d1..fc9b8b2c89 100644
--- a/python/pyiceberg/transforms.py
+++ b/python/pyiceberg/transforms.py
@@ -20,19 +20,13 @@ import struct
from abc import ABC, abstractmethod
from enum import IntEnum
from functools import singledispatch
-from typing import (
- Any,
- Callable,
- Generator,
- Generic,
-)
+from typing import Any, Callable, Generic
from typing import Literal as LiteralType
from typing import Optional, TypeVar
from uuid import UUID
import mmh3
from pydantic import Field, PositiveInt, PrivateAttr
-from pydantic.typing import AnyCallable
from pyiceberg.expressions import (
BoundEqualTo,
@@ -65,7 +59,7 @@ from pyiceberg.expressions.literals import (
TimestampLiteral,
literal,
)
-from pyiceberg.typedef import IcebergBaseModel, L
+from pyiceberg.typedef import IcebergRootModel, L
from pyiceberg.types import (
BinaryType,
DateType,
@@ -106,47 +100,37 @@ def _transform_literal(func: Callable[[L], L], lit:
Literal[L]) -> Literal[L]:
return literal(func(lit.value))
-class Transform(IcebergBaseModel, ABC, Generic[S, T]):
+def parse_transform(v: Any) -> Any:
+ if isinstance(v, str):
+ if v == IDENTITY:
+ return IdentityTransform()
+ elif v == VOID:
+ return VoidTransform()
+ elif v.startswith(BUCKET):
+ return BucketTransform(num_buckets=BUCKET_PARSER.match(v))
+ elif v.startswith(TRUNCATE):
+ return TruncateTransform(width=TRUNCATE_PARSER.match(v))
+ elif v == YEAR:
+ return YearTransform()
+ elif v == MONTH:
+ return MonthTransform()
+ elif v == DAY:
+ return DayTransform()
+ elif v == HOUR:
+ return HourTransform()
+ else:
+ return UnknownTransform(transform=v)
+ return v
+
+
+class Transform(IcebergRootModel[str], ABC, Generic[S, T]):
"""Transform base class for concrete transforms.
A base class to transform values and project predicates on partition
values.
This class is not used directly. Instead, use one of module method to
create the child classes.
"""
- __root__: str = Field()
-
- @classmethod
- def __get_validators__(cls) -> Generator[AnyCallable, None, None]:
- """Called to validate the input of the Transform class."""
- # one or more validators may be yielded which will be called in the
- # order to validate the input, each validator will receive as an input
- # the value returned from the previous validator
- yield cls.validate
-
- @classmethod
- def validate(cls, v: Any) -> IcebergBaseModel:
- # When Pydantic is unable to determine the subtype
- # In this case we'll help pydantic a bit by parsing the transform type
ourselves
- if isinstance(v, str):
- if v == IDENTITY:
- return IdentityTransform()
- elif v == VOID:
- return VoidTransform()
- elif v.startswith(BUCKET):
- return BucketTransform(num_buckets=BUCKET_PARSER.match(v))
- elif v.startswith(TRUNCATE):
- return TruncateTransform(width=TRUNCATE_PARSER.match(v))
- elif v == YEAR:
- return YearTransform()
- elif v == MONTH:
- return MonthTransform()
- elif v == DAY:
- return DayTransform()
- elif v == HOUR:
- return HourTransform()
- else:
- return UnknownTransform(transform=v)
- return v
+ root: str = Field()
@abstractmethod
def transform(self, source: IcebergType) -> Callable[[Optional[S]],
Optional[T]]:
@@ -180,12 +164,12 @@ class Transform(IcebergBaseModel, ABC, Generic[S, T]):
def __str__(self) -> str:
"""Returns the string representation of the Transform class."""
- return self.__root__
+ return self.root
def __eq__(self, other: Any) -> bool:
"""Returns the equality of two instances of the Transform class."""
if isinstance(other, Transform):
- return self.__root__ == other.__root__
+ return self.root == other.root
return False
@@ -199,11 +183,12 @@ class BucketTransform(Transform[S, int]):
num_buckets (int): The number of buckets.
"""
+ root: str = Field()
_num_buckets: PositiveInt = PrivateAttr()
def __init__(self, num_buckets: int, **data: Any) -> None:
- super().__init__(__root__=f"bucket[{num_buckets}]", **data)
self._num_buckets = num_buckets
+ super().__init__(f"bucket[{num_buckets}]", **data)
@property
def num_buckets(self) -> int:
@@ -296,7 +281,7 @@ class TimeResolution(IntEnum):
SECOND = 0
-class TimeTransform(Transform[S, int], Singleton):
+class TimeTransform(Transform[S, int], Generic[S], Singleton):
@property
@abstractmethod
def granularity(self) -> TimeResolution:
@@ -343,7 +328,7 @@ class YearTransform(TimeTransform[S]):
47
"""
- __root__: LiteralType["year"] = Field(default="year") # noqa: F821
+ root: LiteralType["year"] = Field(default="year") # noqa: F821
def transform(self, source: IcebergType) -> Callable[[Optional[S]],
Optional[int]]:
source_type = type(source)
@@ -390,7 +375,7 @@ class MonthTransform(TimeTransform[S]):
575
"""
- __root__: LiteralType["month"] = Field(default="month") # noqa: F821
+ root: LiteralType["month"] = Field(default="month") # noqa: F821
def transform(self, source: IcebergType) -> Callable[[Optional[S]],
Optional[int]]:
source_type = type(source)
@@ -437,7 +422,7 @@ class DayTransform(TimeTransform[S]):
17501
"""
- __root__: LiteralType["day"] = Field(default="day") # noqa: F821
+ root: LiteralType["day"] = Field(default="day") # noqa: F821
def transform(self, source: IcebergType) -> Callable[[Optional[S]],
Optional[int]]:
source_type = type(source)
@@ -487,7 +472,7 @@ class HourTransform(TimeTransform[S]):
420042
"""
- __root__: LiteralType["hour"] = Field(default="hour") # noqa: F821
+ root: LiteralType["hour"] = Field(default="hour") # noqa: F821
def transform(self, source: IcebergType) -> Callable[[Optional[S]],
Optional[int]]:
if type(source) in {TimestampType, TimestamptzType}:
@@ -532,7 +517,10 @@ class IdentityTransform(Transform[S, S]):
'hello-world'
"""
- __root__: LiteralType["identity"] = Field(default="identity") # noqa: F821
+ root: LiteralType["identity"] = Field(default="identity") # noqa: F821
+
+ def __init__(self) -> None:
+ super().__init__("identity")
def transform(self, source: IcebergType) -> Callable[[Optional[S]],
Optional[S]]:
return lambda v: v
@@ -584,12 +572,12 @@ class TruncateTransform(Transform[S, S]):
ValueError: If a type is provided that is incompatible with a Truncate
transform.
"""
- __root__: str = Field()
+ root: str = Field()
_source_type: IcebergType = PrivateAttr()
_width: PositiveInt = PrivateAttr()
def __init__(self, width: int, **data: Any):
- super().__init__(__root__=f"truncate[{width}]", **data)
+ super().__init__(root=f"truncate[{width}]", **data)
self._width = width
def can_transform(self, source: IcebergType) -> bool:
@@ -725,7 +713,7 @@ class UnknownTransform(Transform[S, T]):
source_type (IcebergType): An Iceberg `Type`.
"""
- __root__: LiteralType["unknown"] = Field(default="unknown") # noqa: F821
+ root: LiteralType["unknown"] = Field(default="unknown") # noqa: F821
_transform: str = PrivateAttr()
def __init__(self, transform: str, **data: Any):
@@ -752,7 +740,7 @@ class UnknownTransform(Transform[S, T]):
class VoidTransform(Transform[S, None], Singleton):
"""A transform that always returns None."""
- __root__ = "void"
+ root: str = "void"
def transform(self, source: IcebergType) -> Callable[[Optional[S]],
Optional[T]]:
return lambda v: None
diff --git a/python/pyiceberg/typedef.py b/python/pyiceberg/typedef.py
index f9931bb344..700553057b 100644
--- a/python/pyiceberg/typedef.py
+++ b/python/pyiceberg/typedef.py
@@ -18,12 +18,13 @@ from __future__ import annotations
from abc import abstractmethod
from decimal import Decimal
-from functools import cached_property, lru_cache
+from functools import lru_cache
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
+ Generic,
List,
Optional,
Protocol,
@@ -35,7 +36,7 @@ from typing import (
)
from uuid import UUID
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict, RootModel
if TYPE_CHECKING:
from pyiceberg.types import StructType
@@ -105,10 +106,7 @@ class IcebergBaseModel(BaseModel):
https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally
"""
- class Config:
- keep_untouched = (cached_property,)
- allow_population_by_field_name = True
- frozen = True
+ model_config = ConfigDict(populate_by_name=True, frozen=True)
def _exclude_private_properties(self, exclude: Optional[Set[str]] = None)
-> Set[str]:
# A small trick to exclude private properties. Properties are
serialized by pydantic,
@@ -118,15 +116,42 @@ class IcebergBaseModel(BaseModel):
{field for field in self.__dict__ if field.startswith("_") and not
field == "__root__"}, exclude or set()
)
- def dict(self, exclude_none: bool = True, exclude: Optional[Set[str]] =
None, **kwargs: Any) -> Dict[str, Any]:
- return super().dict(exclude_none=exclude_none,
exclude=self._exclude_private_properties(exclude), **kwargs)
+ def model_dump(
+ self, exclude_none: bool = True, exclude: Optional[Set[str]] = None,
by_alias: bool = True, **kwargs: Any
+ ) -> Dict[str, Any]:
+ return super().model_dump(
+ exclude_none=exclude_none,
exclude=self._exclude_private_properties(exclude), by_alias=by_alias, **kwargs
+ )
- def json(self, exclude_none: bool = True, exclude: Optional[Set[str]] =
None, by_alias: bool = True, **kwargs: Any) -> str:
- return super().json(
+ def model_dump_json(
+ self, exclude_none: bool = True, exclude: Optional[Set[str]] = None,
by_alias: bool = True, **kwargs: Any
+ ) -> str:
+ return super().model_dump_json(
exclude_none=exclude_none,
exclude=self._exclude_private_properties(exclude), by_alias=by_alias, **kwargs
)
+T = TypeVar("T")
+
+
+class IcebergRootModel(RootModel[T], Generic[T]):
+ """
+ This class extends the Pydantic BaseModel to set default values by
overriding them.
+
+ This is because we always want to set by_alias to True. In Python, the
dash can't
+ be used in variable names, and this is used throughout the Iceberg spec.
+
+ The same goes for exclude_none, if a field is None we want to omit it from
+ serialization, for example, the doc attribute on the NestedField object.
+ Default non-null values will be serialized.
+
+ This is recommended by Pydantic:
+
https://pydantic-docs.helpmanual.io/usage/model_config/#change-behaviour-globally
+ """
+
+ model_config = ConfigDict(frozen=True)
+
+
@lru_cache
def _get_struct_fields(struct_type: StructType) -> Tuple[str, ...]:
return tuple([field.name for field in struct_type.fields])
diff --git a/python/pyiceberg/types.py b/python/pyiceberg/types.py
index 6a1888fcf4..95ff1033a1 100644
--- a/python/pyiceberg/types.py
+++ b/python/pyiceberg/types.py
@@ -32,20 +32,26 @@ Notes:
from __future__ import annotations
import re
+from functools import cached_property
from typing import (
Any,
ClassVar,
- Dict,
- Generator,
Literal,
Optional,
Tuple,
)
-from pydantic import Field, PrivateAttr
-from pydantic.typing import AnyCallable
+from pydantic import (
+ Field,
+ PrivateAttr,
+ SerializeAsAny,
+ model_serializer,
+ model_validator,
+)
+from pydantic_core.core_schema import ValidatorFunctionWrapHandler
-from pyiceberg.typedef import IcebergBaseModel
+from pyiceberg.exceptions import ValidationError
+from pyiceberg.typedef import IcebergBaseModel, IcebergRootModel
from pyiceberg.utils.parsing import ParseNumberFromBrackets
from pyiceberg.utils.singleton import Singleton
@@ -54,7 +60,29 @@ FIXED = "fixed"
FIXED_PARSER = ParseNumberFromBrackets(FIXED)
-class IcebergType(IcebergBaseModel, Singleton):
+def _parse_decimal_type(decimal: Any) -> Tuple[int, int]:
+ if isinstance(decimal, str):
+ matches = DECIMAL_REGEX.search(decimal)
+ if matches:
+ return int(matches.group(1)), int(matches.group(2))
+ else:
+ raise ValidationError(f"Could not parse {decimal} into a
DecimalType")
+ elif isinstance(decimal, dict):
+ return decimal["precision"], decimal["scale"]
+ else:
+ return decimal
+
+
+def _parse_fixed_type(fixed: Any) -> int:
+ if isinstance(fixed, str):
+ return FIXED_PARSER.match(fixed)
+ elif isinstance(fixed, dict):
+ return fixed["length"]
+ else:
+ return fixed
+
+
+class IcebergType(IcebergBaseModel):
"""Base type for all Iceberg Types.
Example:
@@ -64,39 +92,54 @@ class IcebergType(IcebergBaseModel, Singleton):
'IcebergType()'
"""
+ @model_validator(mode="wrap")
@classmethod
- def __get_validators__(cls) -> Generator[AnyCallable, None, None]:
- """Called to validate the input of the IcebergType class."""
- # one or more validators may be yielded which will be called in the
- # order to validate the input, each validator will receive as an input
- # the value returned from the previous validator
- yield cls.validate
-
- @classmethod
- def validate(cls, v: Any) -> IcebergType:
- # When Pydantic is unable to determine the subtype
- # In this case we'll help pydantic a bit by parsing the
- # primitive type ourselves, or pointing it at the correct
- # complex type by looking at the type field
-
+ def handle_primitive_type(cls, v: Any, handler:
ValidatorFunctionWrapHandler) -> IcebergType:
+ # Pydantic works mostly around dicts, and there seems to be something
+ # by not serializing into a RootModel, might revisit this.
if isinstance(v, str):
+ if v == "boolean":
+ return BooleanType()
+ elif v == "string":
+ return StringType()
+ elif v == "int":
+ return IntegerType()
+ elif v == "long":
+ return LongType()
+ if v == "float":
+ return FloatType()
+ if v == "double":
+ return DoubleType()
+ if v == "timestamp":
+ return TimestampType()
+ if v == "timestamptz":
+ return TimestamptzType()
+ if v == "date":
+ return DateType()
+ if v == "time":
+ return TimeType()
+ if v == "uuid":
+ return UUIDType()
+ if v == "binary":
+ return BinaryType()
+ if v.startswith("fixed"):
+ return FixedType(_parse_fixed_type(v))
if v.startswith("decimal"):
- return DecimalType.parse(v)
- elif v.startswith("fixed"):
- return FixedType.parse(v)
+ precision, scale = _parse_decimal_type(v)
+ return DecimalType(precision, scale)
else:
- return PRIMITIVE_TYPES[v]
- elif isinstance(v, dict):
- if v.get("type") == "struct":
- return StructType(**v)
- elif v.get("type") == "list":
+ raise ValueError(f"Unknown type: {v}")
+ if isinstance(v, dict) and cls == IcebergType:
+ complex_type = v.get("type")
+ if complex_type == "list":
return ListType(**v)
- elif v.get("type") == "map":
+ elif complex_type == "map":
return MapType(**v)
+ elif complex_type == "struct":
+ return StructType(**v)
else:
return NestedField(**v)
- else:
- return v
+ return handler(v)
@property
def is_primitive(self) -> bool:
@@ -107,10 +150,10 @@ class IcebergType(IcebergBaseModel, Singleton):
return isinstance(self, StructType)
-class PrimitiveType(IcebergType):
+class PrimitiveType(IcebergRootModel[str], IcebergType, Singleton):
"""Base class for all Iceberg Primitive Types."""
- __root__: str = Field()
+ root: Any = Field()
def __repr__(self) -> str:
"""Returns the string representation of the PrimitiveType class."""
@@ -118,7 +161,7 @@ class PrimitiveType(IcebergType):
def __str__(self) -> str:
"""Returns the string representation of the PrimitiveType class."""
- return self.__root__
+ return self.root
class FixedType(PrimitiveType):
@@ -133,32 +176,34 @@ class FixedType(PrimitiveType):
False
"""
- __root__: str = Field()
- _len: int = PrivateAttr()
+ root: int = Field()
- @staticmethod
- def parse(str_repr: str) -> FixedType:
- return FixedType(length=FIXED_PARSER.match(str_repr))
+ def __init__(self, length: int) -> None:
+ super().__init__(root=length)
- def __init__(self, length: int):
- super().__init__(__root__=f"fixed[{length}]")
- self._len = length
+ @model_serializer
+ def ser_model(self) -> str:
+ return f"fixed[{self.root}]"
def __len__(self) -> int:
"""Returns the length of an instance of the FixedType class."""
- return self._len
+ return self.root
+
+ def __str__(self) -> str:
+ """Returns the string representation."""
+ return f"fixed[{self.root}]"
def __repr__(self) -> str:
"""Returns the string representation of the FixedType class."""
- return f"FixedType(length={self._len})"
+ return f"FixedType(length={self.root})"
- def __getnewargs__(self) -> Tuple[int]:
+ def __getnewargs__(self) -> tuple[int]:
"""A magic function for pickling the FixedType class."""
- return (self._len,)
+ return (self.root,)
class DecimalType(PrimitiveType):
- """A fixed data type in Iceberg.
+ """A decimal data type in Iceberg.
Example:
>>> DecimalType(32, 3)
@@ -167,44 +212,45 @@ class DecimalType(PrimitiveType):
True
"""
- __root__: str = Field()
+ root: Tuple[int, int]
- _precision: int = PrivateAttr()
- _scale: int = PrivateAttr()
+ def __init__(self, precision: int, scale: int) -> None:
+ super().__init__(root=(precision, scale))
- @staticmethod
- def parse(str_repr: str) -> DecimalType:
- matches = DECIMAL_REGEX.search(str_repr)
- if matches:
- precision = int(matches.group(1))
- scale = int(matches.group(2))
- return DecimalType(precision, scale)
- else:
- raise ValueError(f"Could not parse {str_repr} into a DecimalType")
-
- def __init__(self, precision: int, scale: int):
- super().__init__(
- __root__=f"decimal({precision}, {scale})",
- )
- # assert precision < scale, "precision should be smaller than scale"
- self._precision = precision
- self._scale = scale
+ @model_serializer
+ def ser_model(self) -> str:
+ """Used when serialized to a string."""
+ return f"decimal({self.precision}, {self.scale})"
@property
def precision(self) -> int:
- return self._precision
+ """Returns the precision of the decimal."""
+ return self.root[0]
@property
def scale(self) -> int:
- return self._scale
+ """Returns the scale of the decimal."""
+ return self.root[1]
def __repr__(self) -> str:
"""Returns the string representation of the DecimalType class."""
- return f"DecimalType(precision={self._precision}, scale={self._scale})"
+ return f"DecimalType(precision={self.precision}, scale={self.scale})"
+
+ def __str__(self) -> str:
+ """Returns the string representation."""
+ return f"decimal({self.precision}, {self.scale})"
+
+ def __hash__(self) -> int:
+ """Returns the hash of the tuple."""
+ return hash(self.root)
def __getnewargs__(self) -> Tuple[int, int]:
"""A magic function for pickling the DecimalType class."""
- return (self._precision, self._scale)
+ return self.precision, self.scale
+
+ def __eq__(self, other: Any) -> bool:
+ """Compares to root to another object."""
+ return self.root == other.root if isinstance(other, DecimalType) else
False
class NestedField(IcebergType):
@@ -232,10 +278,10 @@ class NestedField(IcebergType):
field_id: int = Field(alias="id")
name: str = Field()
- field_type: IcebergType = Field(alias="type")
+ field_type: SerializeAsAny[IcebergType] = Field(alias="type")
required: bool = Field(default=True)
doc: Optional[str] = Field(default=None, repr=False)
- initial_default: Any = Field(alias="initial-default", repr=False)
+ initial_default: Optional[Any] = Field(alias="initial-default",
default=None, repr=False)
def __init__(
self,
@@ -249,12 +295,12 @@ class NestedField(IcebergType):
):
# We need an init when we want to use positional arguments, but
# need also to support the aliases.
- data["field_id"] = data["id"] if "id" in data else field_id
+ data["id"] = data["id"] if "id" in data else field_id
data["name"] = name
- data["field_type"] = data["type"] if "type" in data else field_type
+ data["type"] = data["type"] if "type" in data else field_type
data["required"] = required
data["doc"] = doc
- data["initial_default"] = initial_default
+ data["initial-default"] = initial_default
super().__init__(**data)
def __str__(self) -> str:
@@ -283,7 +329,7 @@ class StructType(IcebergType):
'struct<1: required_field: optional string, 2: optional_field:
optional int>'
"""
- type: Literal["struct"] = "struct"
+ type: Literal["struct"] = Field(default="struct")
fields: Tuple[NestedField, ...] = Field(default_factory=tuple)
_hash: int = PrivateAttr()
@@ -320,6 +366,10 @@ class StructType(IcebergType):
"""Used the cache hash value of the StructType class."""
return self._hash
+ def __eq__(self, other: Any) -> bool:
+ """Compares the object if it is equal to another object."""
+ return self.fields == other.fields if isinstance(other, StructType)
else False
+
class ListType(IcebergType):
"""A list type in Iceberg.
@@ -329,28 +379,30 @@ class ListType(IcebergType):
ListType(element_id=3, element_type=StringType(),
element_required=True)
"""
- class Config:
- fields = {"element_field": {"exclude": True}}
-
- type: Literal["list"] = "list"
+ type: Literal["list"] = Field(default="list")
element_id: int = Field(alias="element-id")
- element_type: IcebergType = Field(alias="element")
+ element_type: SerializeAsAny[IcebergType] = Field(alias="element")
element_required: bool = Field(alias="element-required", default=True)
- element_field: NestedField = Field(init=False, repr=False)
+ _element_field: NestedField = PrivateAttr()
+ _hash: int = PrivateAttr()
def __init__(
self, element_id: Optional[int] = None, element: Optional[IcebergType]
= None, element_required: bool = True, **data: Any
):
- data["element_id"] = data["element-id"] if "element-id" in data else
element_id
- data["element_type"] = element or data["element_type"]
- data["element_required"] = data["element-required"] if
"element-required" in data else element_required
- data["element_field"] = NestedField(
+ data["element-id"] = data["element-id"] if "element-id" in data else
element_id
+ data["element"] = element or data["element_type"]
+ data["element-required"] = data["element-required"] if
"element-required" in data else element_required
+ super().__init__(**data)
+ self._hash = hash(data.values())
+
+ @cached_property
+ def element_field(self) -> NestedField:
+ return NestedField(
name="element",
- required=data["element_required"],
- field_id=data["element_id"],
- field_type=data["element_type"],
+ field_id=self.element_id,
+ field_type=self.element_type,
+ required=self.element_required,
)
- super().__init__(**data)
def __str__(self) -> str:
"""Returns the string representation of the ListType class."""
@@ -360,6 +412,14 @@ class ListType(IcebergType):
"""A magic function for pickling the ListType class."""
return (self.element_id, self.element_type, self.element_required)
+ def __hash__(self) -> int:
+ """Used the cache hash value of the StructType class."""
+ return self._hash
+
+ def __eq__(self, other: Any) -> bool:
+ """Compares the list type to another list type."""
+ return self.element_field == other.element_field if isinstance(other,
ListType) else False
+
class MapType(IcebergType):
"""A map type in Iceberg.
@@ -369,17 +429,13 @@ class MapType(IcebergType):
MapType(key_id=1, key_type=StringType(), value_id=2,
value_type=IntegerType(), value_required=True)
"""
- type: Literal["map"] = "map"
+ type: Literal["map"] = Field(default="map")
key_id: int = Field(alias="key-id")
- key_type: IcebergType = Field(alias="key")
+ key_type: SerializeAsAny[IcebergType] = Field(alias="key")
value_id: int = Field(alias="value-id")
- value_type: IcebergType = Field(alias="value")
+ value_type: SerializeAsAny[IcebergType] = Field(alias="value")
value_required: bool = Field(alias="value-required", default=True)
- key_field: NestedField = Field(init=False, repr=False)
- value_field: NestedField = Field(init=False, repr=False)
-
- class Config:
- fields = {"key_field": {"exclude": True}, "value_field": {"exclude":
True}}
+ _hash: int = PrivateAttr()
def __init__(
self,
@@ -390,17 +446,31 @@ class MapType(IcebergType):
value_required: bool = True,
**data: Any,
):
- data["key_id"] = key_id or data["key-id"]
- data["key_type"] = key_type or data["key"]
- data["value_id"] = value_id or data["value-id"]
- data["value_type"] = value_type or data["value"]
- data["value_required"] = value_required if value_required is not None
else data["value_required"]
-
- data["key_field"] = NestedField(name="key", field_id=data["key_id"],
field_type=data["key_type"], required=True)
- data["value_field"] = NestedField(
- name="value", field_id=data["value_id"],
field_type=data["value_type"], required=data["value_required"]
- )
+ data["key-id"] = data["key-id"] if "key-id" in data else key_id
+ data["key"] = data["key"] if "key" in data else key_type
+ data["value-id"] = data["value-id"] if "value-id" in data else value_id
+ data["value"] = data["value"] if "value" in data else value_type
+ data["value-required"] = data["value-required"] if "value-required" in
data else value_required
super().__init__(**data)
+ self._hash = hash(self.__getnewargs__())
+
+ @cached_property
+ def key_field(self) -> NestedField:
+ return NestedField(
+ name="key",
+ field_id=self.key_id,
+ field_type=self.key_type,
+ required=True,
+ )
+
+ @cached_property
+ def value_field(self) -> NestedField:
+ return NestedField(
+ name="value",
+ field_id=self.value_id,
+ field_type=self.value_type,
+ required=self.value_required,
+ )
def __str__(self) -> str:
"""Returns the string representation of the MapType class."""
@@ -410,6 +480,16 @@ class MapType(IcebergType):
"""A magic function for pickling the MapType class."""
return (self.key_id, self.key_type, self.value_id, self.value_type,
self.value_required)
+ def __hash__(self) -> int:
+ """Returns the hash of the MapType."""
+ return self._hash
+
+ def __eq__(self, other: Any) -> bool:
+ """Compares the MapType to another object."""
+ return (
+ self.key_field == other.key_field and self.value_field ==
other.value_field if isinstance(other, MapType) else False
+ )
+
class BooleanType(PrimitiveType):
"""A boolean data type in Iceberg can be represented using an instance of
this class.
@@ -422,7 +502,7 @@ class BooleanType(PrimitiveType):
BooleanType()
"""
- __root__ = "boolean"
+ root: Literal["boolean"] = Field(default="boolean")
class IntegerType(PrimitiveType):
@@ -442,11 +522,11 @@ class IntegerType(PrimitiveType):
in Java (returns `-2147483648`)
"""
+ root: Literal["int"] = Field(default="int")
+
max: ClassVar[int] = 2147483647
min: ClassVar[int] = -2147483648
- __root__ = "int"
-
class LongType(PrimitiveType):
"""A Long data type in Iceberg can be represented using an instance of
this class.
@@ -469,11 +549,11 @@ class LongType(PrimitiveType):
in Java (returns `-9223372036854775808`)
"""
+ root: Literal["long"] = Field(default="long")
+
max: ClassVar[int] = 9223372036854775807
min: ClassVar[int] = -9223372036854775808
- __root__ = "long"
-
class FloatType(PrimitiveType):
"""A Float data type in Iceberg can be represented using an instance of
this class.
@@ -497,7 +577,7 @@ class FloatType(PrimitiveType):
max: ClassVar[float] = 3.4028235e38
min: ClassVar[float] = -3.4028235e38
- __root__ = "float"
+ root: Literal["float"] = Field(default="float")
class DoubleType(PrimitiveType):
@@ -513,7 +593,7 @@ class DoubleType(PrimitiveType):
DoubleType()
"""
- __root__ = "double"
+ root: Literal["double"] = Field(default="double")
class DateType(PrimitiveType):
@@ -529,7 +609,7 @@ class DateType(PrimitiveType):
DateType()
"""
- __root__ = "date"
+ root: Literal["date"] = Field(default="date")
class TimeType(PrimitiveType):
@@ -545,7 +625,7 @@ class TimeType(PrimitiveType):
TimeType()
"""
- __root__ = "time"
+ root: Literal["time"] = Field(default="time")
class TimestampType(PrimitiveType):
@@ -561,7 +641,7 @@ class TimestampType(PrimitiveType):
TimestampType()
"""
- __root__ = "timestamp"
+ root: Literal["timestamp"] = Field(default="timestamp")
class TimestamptzType(PrimitiveType):
@@ -577,7 +657,7 @@ class TimestamptzType(PrimitiveType):
TimestamptzType()
"""
- __root__ = "timestamptz"
+ root: Literal["timestamptz"] = Field(default="timestamptz")
class StringType(PrimitiveType):
@@ -593,7 +673,7 @@ class StringType(PrimitiveType):
StringType()
"""
- __root__ = "string"
+ root: Literal["string"] = Field(default="string")
class UUIDType(PrimitiveType):
@@ -609,7 +689,7 @@ class UUIDType(PrimitiveType):
UUIDType()
"""
- __root__ = "uuid"
+ root: Literal["uuid"] = Field(default="uuid")
class BinaryType(PrimitiveType):
@@ -625,20 +705,4 @@ class BinaryType(PrimitiveType):
BinaryType()
"""
- __root__ = "binary"
-
-
-PRIMITIVE_TYPES: Dict[str, PrimitiveType] = {
- "boolean": BooleanType(),
- "int": IntegerType(),
- "long": LongType(),
- "float": FloatType(),
- "double": DoubleType(),
- "date": DateType(),
- "time": TimeType(),
- "timestamp": TimestampType(),
- "timestamptz": TimestamptzType(),
- "string": StringType(),
- "uuid": UUIDType(),
- "binary": BinaryType(),
-}
+ root: Literal["binary"] = Field(default="binary")
diff --git a/python/pyiceberg/utils/parsing.py
b/python/pyiceberg/utils/parsing.py
index 22786a1d68..200904fd97 100644
--- a/python/pyiceberg/utils/parsing.py
+++ b/python/pyiceberg/utils/parsing.py
@@ -17,6 +17,8 @@
import re
from re import Pattern
+from pyiceberg.exceptions import ValidationError
+
class ParseNumberFromBrackets:
"""Extracts the size from a string in the form of prefix[22]."""
@@ -32,4 +34,4 @@ class ParseNumberFromBrackets:
matches = self.regex.search(str_repr)
if matches:
return int(matches.group(1))
- raise ValueError(f"Could not match {str_repr}, expected format
{self.prefix}[22]")
+ raise ValidationError(f"Could not match {str_repr}, expected format
{self.prefix}[22]")
diff --git a/python/pyproject.toml b/python/pyproject.toml
index e27e10da3f..2c3c52fa4e 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -51,7 +51,7 @@ requests = ">=2.20.0,<3.0.0"
click = ">=7.1.1,<9.0.0"
rich = ">=10.11.0,<14.0.0"
strictyaml = ">=1.7.0,<2.0.0" # CVE-2020-14343 was fixed in 5.4.
-pydantic = ">=1.9.0,<2.0.0"
+pydantic = ">=2.0,<3.0"
sortedcontainers = "2.4.0"
fsspec = ">=2021.09.0,<2024.1.0" # `lexists()` was implemented in 2021.09.0.
Upper bound set arbitrarily, to be reassessed in early 2024.
pyparsing = ">=3.1.0,<4.0.0"
@@ -162,6 +162,10 @@ ignore_missing_imports = true
module = "pydantic.*"
ignore_missing_imports = true
+[[tool.mypy.overrides]]
+module = "pydantic_core.*"
+ignore_missing_imports = true
+
[[tool.mypy.overrides]]
module = "pytest.*"
ignore_missing_imports = true
diff --git a/python/tests/avro/test_reader.py b/python/tests/avro/test_reader.py
index 36d148e8b3..99a6499bf5 100644
--- a/python/tests/avro/test_reader.py
+++ b/python/tests/avro/test_reader.py
@@ -329,7 +329,7 @@ def test_binary_reader() -> None:
def test_unknown_type() -> None:
class UnknownType(PrimitiveType):
- __root__ = "UnknownType"
+ root: str = "UnknownType"
with pytest.raises(ValueError) as exc_info:
construct_reader(UnknownType())
diff --git a/python/tests/avro/test_writer.py b/python/tests/avro/test_writer.py
index 2cdcd4482a..991d9d1ae7 100644
--- a/python/tests/avro/test_writer.py
+++ b/python/tests/avro/test_writer.py
@@ -128,7 +128,7 @@ def test_binary_writer() -> None:
def test_unknown_type() -> None:
class UnknownType(PrimitiveType):
- __root__ = "UnknownType"
+ root: str = "UnknownType"
with pytest.raises(ValueError) as exc_info:
construct_writer(UnknownType())
diff --git a/python/tests/catalog/test_hive.py
b/python/tests/catalog/test_hive.py
index ea40289618..ef4757cbf4 100644
--- a/python/tests/catalog/test_hive.py
+++ b/python/tests/catalog/test_hive.py
@@ -290,7 +290,7 @@ def test_create_table(table_schema_simple: Schema,
hive_database: HiveDatabase,
last_sequence_number=0,
)
- assert metadata.dict() == expected.dict()
+ assert metadata.model_dump() == expected.model_dump()
def test_load_table(hive_table: HiveTable) -> None:
diff --git a/python/tests/catalog/test_rest.py
b/python/tests/catalog/test_rest.py
index a7663ac511..829611e988 100644
--- a/python/tests/catalog/test_rest.py
+++ b/python/tests/catalog/test_rest.py
@@ -440,7 +440,7 @@ def test_load_table_200(rest_mock: Mocker) -> None:
manifest_list="s3://warehouse/database/table/metadata/snap-3497810964824022504-1-c4f68204-666b-4e50-a9df-b10c34bf6b82.avro",
summary=Summary(
operation=Operation.APPEND,
- **{ # type: ignore
+ **{
"spark.app.id": "local-1646787004168",
"added-data-files": "1",
"added-records": "1",
@@ -487,6 +487,8 @@ def test_load_table_200(rest_mock: Mocker) -> None:
io=load_file_io(),
catalog=catalog,
)
+ # First compare the dicts
+ assert actual.metadata.model_dump() == expected.metadata.model_dump()
assert actual == expected
@@ -588,7 +590,7 @@ def test_create_table_200(rest_mock: Mocker,
table_schema_simple: Schema) -> Non
request_headers=TEST_HEADERS,
)
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN)
- table = catalog.create_table(
+ actual = catalog.create_table(
identifier=("fokko", "fokko2"),
schema=table_schema_simple,
location=None,
@@ -598,7 +600,7 @@ def test_create_table_200(rest_mock: Mocker,
table_schema_simple: Schema) -> Non
sort_order=SortOrder(SortField(source_id=2,
transform=IdentityTransform())),
properties={"owner": "fokko"},
)
- assert table == Table(
+ expected = Table(
identifier=("rest", "fokko", "fokko2"),
metadata_location="s3://warehouse/database/table/metadata.json",
metadata=TableMetadataV1(
@@ -644,6 +646,7 @@ def test_create_table_200(rest_mock: Mocker,
table_schema_simple: Schema) -> Non
io=load_file_io(),
catalog=catalog,
)
+ assert actual == expected
def test_create_table_409(rest_mock: Mocker, table_schema_simple: Schema) ->
None:
diff --git a/python/tests/cli/test_console.py b/python/tests/cli/test_console.py
index 31c062a228..12c82c2cde 100644
--- a/python/tests/cli/test_console.py
+++ b/python/tests/cli/test_console.py
@@ -558,7 +558,7 @@ def test_json_describe_table(catalog: InMemoryCatalog) ->
None:
assert result.exit_code == 0
assert (
result.output
- == """{"identifier": ["default", "my_table"], "metadata_location":
"s3://warehouse/default/my_table/metadata/metadata.json", "metadata":
{"location": "s3://bucket/test/location", "table-uuid":
"d20125c8-7284-442c-9aea-15fee620737c", "last-updated-ms": 1602638573874,
"last-column-id": 3, "schemas": [{"type": "struct", "fields": [{"id": 1,
"name": "x", "type": "long", "required": true}, {"id": 2, "name": "y", "type":
"long", "required": true, "doc": "comment"}, {"id": 3, "name": "z [...]
+ ==
"""{"identifier":["default","my_table"],"metadata_location":"s3://warehouse/default/my_table/metadata/metadata.json","metadata":{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"sche
[...]
)
@@ -587,7 +587,7 @@ def test_json_schema(catalog: InMemoryCatalog) -> None:
assert result.exit_code == 0
assert (
result.output
- == """{"type": "struct", "fields": [{"id": 1, "name": "x", "type":
"long", "required": true}, {"id": 2, "name": "y", "type": "long", "required":
true, "doc": "comment"}, {"id": 3, "name": "z", "type": "long", "required":
true}], "schema-id": 0, "identifier-field-ids": []}\n"""
+ ==
"""{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}\n"""
)
@@ -611,10 +611,7 @@ def test_json_spec(catalog: InMemoryCatalog) -> None:
runner = CliRunner()
result = runner.invoke(run, ["--output=json", "spec", "default.my_table"])
assert result.exit_code == 0
- assert (
- result.output
- == """{"spec-id": 0, "fields": [{"source-id": 1, "field-id": 1000,
"transform": "identity", "name": "x"}]}\n"""
- )
+ assert result.output ==
"""{"spec-id":0,"fields":[{"source-id":1,"field-id":1000,"transform":"identity","name":"x"}]}\n"""
def test_json_spec_does_not_exists(catalog: InMemoryCatalog) -> None:
diff --git a/python/tests/conftest.py b/python/tests/conftest.py
index e0e2edc0b8..9a560284ea 100644
--- a/python/tests/conftest.py
+++ b/python/tests/conftest.py
@@ -311,7 +311,7 @@ EXAMPLE_TABLE_METADATA_V2 = {
],
}
],
- "properties": {"read.split.target.size": 134217728},
+ "properties": {"read.split.target.size": "134217728"},
"current-snapshot-id": 3055729675574597004,
"snapshots": [
{
@@ -1548,7 +1548,7 @@ def clean_up(test_catalog: Catalog) -> None:
def data_file(table_schema_simple: Schema, tmp_path: str) -> str:
table = pa.table(
{"foo": ["a", "b", "c"], "bar": [1, 2, 3], "baz": [True, False, None]},
- metadata={"iceberg.schema": table_schema_simple.json()},
+ metadata={"iceberg.schema": table_schema_simple.model_dump_json()},
)
file_path = f"{tmp_path}/0000-data.parquet"
diff --git a/python/tests/io/test_pyarrow.py b/python/tests/io/test_pyarrow.py
index 0d0719c152..dab47c2492 100644
--- a/python/tests/io/test_pyarrow.py
+++ b/python/tests/io/test_pyarrow.py
@@ -708,7 +708,7 @@ def _write_table_to_file(filepath: str, schema: pa.Schema,
table: pa.Table) -> s
@pytest.fixture
def file_int(schema_int: Schema, tmpdir: str) -> str:
- pyarrow_schema = pa.schema(schema_to_pyarrow(schema_int),
metadata={"iceberg.schema": schema_int.json()})
+ pyarrow_schema = pa.schema(schema_to_pyarrow(schema_int),
metadata={"iceberg.schema": schema_int.model_dump_json()})
return _write_table_to_file(
f"file:{tmpdir}/a.parquet", pyarrow_schema,
pa.Table.from_arrays([pa.array([0, 1, 2])], schema=pyarrow_schema)
)
@@ -716,7 +716,7 @@ def file_int(schema_int: Schema, tmpdir: str) -> str:
@pytest.fixture
def file_int_str(schema_int_str: Schema, tmpdir: str) -> str:
- pyarrow_schema = pa.schema(schema_to_pyarrow(schema_int_str),
metadata={"iceberg.schema": schema_int_str.json()})
+ pyarrow_schema = pa.schema(schema_to_pyarrow(schema_int_str),
metadata={"iceberg.schema": schema_int_str.model_dump_json()})
return _write_table_to_file(
f"file:{tmpdir}/a.parquet",
pyarrow_schema,
@@ -726,7 +726,7 @@ def file_int_str(schema_int_str: Schema, tmpdir: str) ->
str:
@pytest.fixture
def file_string(schema_str: Schema, tmpdir: str) -> str:
- pyarrow_schema = pa.schema(schema_to_pyarrow(schema_str),
metadata={"iceberg.schema": schema_str.json()})
+ pyarrow_schema = pa.schema(schema_to_pyarrow(schema_str),
metadata={"iceberg.schema": schema_str.model_dump_json()})
return _write_table_to_file(
f"file:{tmpdir}/b.parquet", pyarrow_schema,
pa.Table.from_arrays([pa.array(["0", "1", "2"])], schema=pyarrow_schema)
)
@@ -734,7 +734,7 @@ def file_string(schema_str: Schema, tmpdir: str) -> str:
@pytest.fixture
def file_long(schema_long: Schema, tmpdir: str) -> str:
- pyarrow_schema = pa.schema(schema_to_pyarrow(schema_long),
metadata={"iceberg.schema": schema_long.json()})
+ pyarrow_schema = pa.schema(schema_to_pyarrow(schema_long),
metadata={"iceberg.schema": schema_long.model_dump_json()})
return _write_table_to_file(
f"file:{tmpdir}/c.parquet", pyarrow_schema,
pa.Table.from_arrays([pa.array([0, 1, 2])], schema=pyarrow_schema)
)
@@ -742,7 +742,7 @@ def file_long(schema_long: Schema, tmpdir: str) -> str:
@pytest.fixture
def file_struct(schema_struct: Schema, tmpdir: str) -> str:
- pyarrow_schema = pa.schema(schema_to_pyarrow(schema_struct),
metadata={"iceberg.schema": schema_struct.json()})
+ pyarrow_schema = pa.schema(schema_to_pyarrow(schema_struct),
metadata={"iceberg.schema": schema_struct.model_dump_json()})
return _write_table_to_file(
f"file:{tmpdir}/d.parquet",
pyarrow_schema,
@@ -759,7 +759,7 @@ def file_struct(schema_struct: Schema, tmpdir: str) -> str:
@pytest.fixture
def file_list(schema_list: Schema, tmpdir: str) -> str:
- pyarrow_schema = pa.schema(schema_to_pyarrow(schema_list),
metadata={"iceberg.schema": schema_list.json()})
+ pyarrow_schema = pa.schema(schema_to_pyarrow(schema_list),
metadata={"iceberg.schema": schema_list.model_dump_json()})
return _write_table_to_file(
f"file:{tmpdir}/e.parquet",
pyarrow_schema,
@@ -777,7 +777,7 @@ def file_list(schema_list: Schema, tmpdir: str) -> str:
@pytest.fixture
def file_list_of_structs(schema_list_of_structs: Schema, tmpdir: str) -> str:
pyarrow_schema = pa.schema(
- schema_to_pyarrow(schema_list_of_structs), metadata={"iceberg.schema":
schema_list_of_structs.json()}
+ schema_to_pyarrow(schema_list_of_structs), metadata={"iceberg.schema":
schema_list_of_structs.model_dump_json()}
)
return _write_table_to_file(
f"file:{tmpdir}/e.parquet",
@@ -795,7 +795,7 @@ def file_list_of_structs(schema_list_of_structs: Schema,
tmpdir: str) -> str:
@pytest.fixture
def file_map(schema_map: Schema, tmpdir: str) -> str:
- pyarrow_schema = pa.schema(schema_to_pyarrow(schema_map),
metadata={"iceberg.schema": schema_map.json()})
+ pyarrow_schema = pa.schema(schema_to_pyarrow(schema_map),
metadata={"iceberg.schema": schema_map.model_dump_json()})
return _write_table_to_file(
f"file:{tmpdir}/e.parquet",
pyarrow_schema,
diff --git a/python/tests/table/test_init.py b/python/tests/table/test_init.py
index 87aa0d3815..2587fb76d9 100644
--- a/python/tests/table/test_init.py
+++ b/python/tests/table/test_init.py
@@ -37,7 +37,12 @@ from pyiceberg.manifest import (
)
from pyiceberg.partitioning import PartitionField, PartitionSpec
from pyiceberg.schema import Schema
-from pyiceberg.table import StaticTable, Table, _match_deletes_to_datafile
+from pyiceberg.table import (
+ SetPropertiesUpdate,
+ StaticTable,
+ Table,
+ _match_deletes_to_datafile,
+)
from pyiceberg.table.metadata import INITIAL_SEQUENCE_NUMBER, TableMetadataV2
from pyiceberg.table.snapshots import (
Operation,
@@ -379,3 +384,7 @@ def test_match_deletes_to_datafile_duplicate_number() ->
None:
delete_entry_1.data_file,
delete_entry_2.data_file,
}
+
+
+def test_serialize_set_properties_updates() -> None:
+ assert SetPropertiesUpdate(updates={"abc": "🤪"}).model_dump_json() ==
"""{"action":"set-properties","updates":{"abc":"🤪"}}"""
diff --git a/python/tests/table/test_metadata.py
b/python/tests/table/test_metadata.py
index 3ebf49e0e0..2273843645 100644
--- a/python/tests/table/test_metadata.py
+++ b/python/tests/table/test_metadata.py
@@ -18,6 +18,7 @@
import io
import json
+from copy import copy
from typing import Any, Dict
from unittest.mock import MagicMock, patch
from uuid import UUID
@@ -30,7 +31,6 @@ from pyiceberg.schema import Schema
from pyiceberg.serializers import FromByteStream
from pyiceberg.table import SortOrder
from pyiceberg.table.metadata import (
- TableMetadataFactory,
TableMetadataUtil,
TableMetadataV1,
TableMetadataV2,
@@ -50,7 +50,6 @@ from pyiceberg.types import (
StringType,
StructType,
)
-from tests.conftest import EXAMPLE_TABLE_METADATA_V2
EXAMPLE_TABLE_METADATA_V1 = {
"format-version": 1,
@@ -98,19 +97,6 @@ def test_from_dict_v2_parse_raw(example_table_metadata_v2:
Dict[str, Any]) -> No
TableMetadataUtil.parse_raw(json.dumps(example_table_metadata_v2))
[email protected](
- "table_metadata, expected_version",
- [
- (EXAMPLE_TABLE_METADATA_V1, 1),
- (EXAMPLE_TABLE_METADATA_V2, 2),
- ],
-)
-def test_table_metadata_factory(table_metadata: Dict[str, Any],
expected_version: int) -> None:
- """Test initialization of a TableMetadataFactory instance"""
- factory = TableMetadataFactory(table_metadata=table_metadata)
- assert factory.table_metadata.format_version == expected_version
-
-
def test_from_byte_stream(example_table_metadata_v2: Dict[str, Any]) -> None:
"""Test generating a TableMetadata instance from a file-like byte stream"""
data = bytes(json.dumps(example_table_metadata_v2), encoding="utf-8")
@@ -120,7 +106,7 @@ def test_from_byte_stream(example_table_metadata_v2:
Dict[str, Any]) -> None:
def test_v2_metadata_parsing(example_table_metadata_v2: Dict[str, Any]) ->
None:
"""Test retrieving values from a TableMetadata instance of version 2"""
- table_metadata =
TableMetadataFactory(table_metadata=example_table_metadata_v2).table_metadata
+ table_metadata = TableMetadataUtil.parse_obj(example_table_metadata_v2)
assert table_metadata.format_version == 2
assert table_metadata.table_uuid ==
UUID("9c12d441-03fe-4693-9a96-a0705ddf69c1")
@@ -195,7 +181,7 @@ def test_updating_metadata(example_table_metadata_v2:
Dict[str, Any]) -> None:
],
}
- mutable_table_metadata = table_metadata.dict()
+ mutable_table_metadata = table_metadata.model_dump()
mutable_table_metadata["schemas"].append(new_schema)
mutable_table_metadata["current-schema-id"] = 1
@@ -207,14 +193,14 @@ def test_updating_metadata(example_table_metadata_v2:
Dict[str, Any]) -> None:
def test_serialize_v1(example_table_metadata_v1: Dict[str, Any]) -> None:
table_metadata = TableMetadataV1(**example_table_metadata_v1)
- table_metadata_json = table_metadata.json()
- expected = """{"location": "s3://bucket/test/location", "table-uuid":
"d20125c8-7284-442c-9aea-15fee620737c", "last-updated-ms": 1602638573874,
"last-column-id": 3, "schemas": [{"type": "struct", "fields": [{"id": 1,
"name": "x", "type": "long", "required": true}, {"id": 2, "name": "y", "type":
"long", "required": true, "doc": "comment"}, {"id": 3, "name": "z", "type":
"long", "required": true}], "schema-id": 0, "identifier-field-ids": []}],
"current-schema-id": 0, "partition-specs": [...]
+ table_metadata_json = table_metadata.model_dump_json()
+ expected =
"""{"location":"s3://bucket/test/location","table-uuid":"d20125c8-7284-442c-9aea-15fee620737c","last-updated-ms":1602638573874,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]}],"current-schema-id":0,"partition-specs":[{"spec-id":0,"fields":[{"source-id":1,"fiel
[...]
assert table_metadata_json == expected
def test_serialize_v2(example_table_metadata_v2: Dict[str, Any]) -> None:
- table_metadata = TableMetadataV2(**example_table_metadata_v2).json()
- expected = """{"location": "s3://bucket/test/location", "table-uuid":
"9c12d441-03fe-4693-9a96-a0705ddf69c1", "last-updated-ms": 1602638573590,
"last-column-id": 3, "schemas": [{"type": "struct", "fields": [{"id": 1,
"name": "x", "type": "long", "required": true}], "schema-id": 0,
"identifier-field-ids": []}, {"type": "struct", "fields": [{"id": 1, "name":
"x", "type": "long", "required": true}, {"id": 2, "name": "y", "type": "long",
"required": true, "doc": "comment"}, {"id": 3, "na [...]
+ table_metadata =
TableMetadataV2(**example_table_metadata_v2).model_dump_json()
+ expected =
"""{"location":"s3://bucket/test/location","table-uuid":"9c12d441-03fe-4693-9a96-a0705ddf69c1","last-updated-ms":1602638573590,"last-column-id":3,"schemas":[{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true}],"schema-id":0,"identifier-field-ids":[]},{"type":"struct","fields":[{"id":1,"name":"x","type":"long","required":true},{"id":2,"name":"y","type":"long","required":true,"doc":"comment"},{"id":3,"name":"z","type":"long","required":true}],"schema
[...]
assert table_metadata == expected
@@ -237,32 +223,16 @@ def
test_migrate_v1_partition_specs(example_table_metadata_v1: Dict[str, Any]) -
]
-def test_invalid_format_version() -> None:
+def test_invalid_format_version(example_table_metadata_v1: Dict[str, Any]) ->
None:
"""Test the exception when trying to load an unknown version"""
- table_metadata_invalid_format_version = {
- "format-version": -1,
- "table-uuid": "d20125c8-7284-442c-9aea-15fee620737c",
- "location": "s3://bucket/test/location",
- "last-updated-ms": 1602638573874,
- "last-column-id": 3,
- "schema": {
- "type": "struct",
- "fields": [
- {"id": 1, "name": "x", "required": True, "type": "long"},
- {"id": 2, "name": "y", "required": True, "type": "long",
"doc": "comment"},
- {"id": 3, "name": "z", "required": True, "type": "long"},
- ],
- },
- "partition-spec": [{"name": "x", "transform": "identity", "source-id":
1, "field-id": 1000}],
- "properties": {},
- "current-snapshot-id": -1,
- "snapshots": [],
- }
+
+ example_table_metadata_v22 = copy(example_table_metadata_v1)
+ example_table_metadata_v22["format-version"] = -1
with pytest.raises(ValidationError) as exc_info:
-
TableMetadataUtil.parse_raw(json.dumps(table_metadata_invalid_format_version))
+ TableMetadataUtil.parse_raw(json.dumps(example_table_metadata_v22))
- assert "No match for discriminator 'format_version' and value -1 (allowed
values: 1, 2)" in str(exc_info.value)
+ assert "Input tag '-1' found using 'format_version'" in str(exc_info.value)
def test_current_schema_not_found() -> None:
@@ -430,7 +400,7 @@ def test_v1_writing_metadata(example_table_metadata_v1:
Dict[str, Any]) -> None:
"""
table_metadata = TableMetadataV1(**example_table_metadata_v1)
- metadata_v1_json = table_metadata.json()
+ metadata_v1_json = table_metadata.model_dump_json()
metadata_v1 = json.loads(metadata_v1_json)
assert "last-sequence-number" not in metadata_v1
@@ -487,7 +457,7 @@ def test_v1_write_metadata_for_v2() -> None:
}
table_metadata = TableMetadataV1(**minimal_example_v1).to_v2()
- metadata_v2_json = table_metadata.json()
+ metadata_v2_json = table_metadata.model_dump_json()
metadata_v2 = json.loads(metadata_v2_json)
assert metadata_v2["last-sequence-number"] == 0
@@ -740,4 +710,4 @@ def test_make_metadata_fresh() -> None:
last_sequence_number=0,
)
- assert actual.dict() == expected.dict()
+ assert actual.model_dump() == expected.model_dump()
diff --git a/python/tests/table/test_partitioning.py
b/python/tests/table/test_partitioning.py
index b2d78c49ba..cb60c9a8e5 100644
--- a/python/tests/table/test_partitioning.py
+++ b/python/tests/table/test_partitioning.py
@@ -84,7 +84,7 @@ def test_unpartitioned() -> None:
def test_serialize_unpartitioned_spec() -> None:
- assert UNPARTITIONED_PARTITION_SPEC.json() == """{"spec-id": 0, "fields":
[]}"""
+ assert UNPARTITIONED_PARTITION_SPEC.model_dump_json() ==
"""{"spec-id":0,"fields":[]}"""
def test_serialize_partition_spec() -> None:
@@ -94,15 +94,22 @@ def test_serialize_partition_spec() -> None:
spec_id=3,
)
assert (
- partitioned.json()
- == """{"spec-id": 3, "fields": [{"source-id": 1, "field-id": 1000,
"transform": "truncate[19]", "name": "str_truncate"}, {"source-id": 2,
"field-id": 1001, "transform": "bucket[25]", "name": "int_bucket"}]}"""
+ partitioned.model_dump_json()
+ ==
"""{"spec-id":3,"fields":[{"source-id":1,"field-id":1000,"transform":"truncate[19]","name":"str_truncate"},{"source-id":2,"field-id":1001,"transform":"bucket[25]","name":"int_bucket"}]}"""
)
+def test_deserialize_unpartition_spec() -> None:
+ json_partition_spec = """{"spec-id":0,"fields":[]}"""
+ spec = PartitionSpec.model_validate_json(json_partition_spec)
+
+ assert spec == PartitionSpec(spec_id=0)
+
+
def test_deserialize_partition_spec() -> None:
json_partition_spec = """{"spec-id": 3, "fields": [{"source-id": 1,
"field-id": 1000, "transform": "truncate[19]", "name": "str_truncate"},
{"source-id": 2, "field-id": 1001, "transform": "bucket[25]", "name":
"int_bucket"}]}"""
- spec = PartitionSpec.parse_raw(json_partition_spec)
+ spec = PartitionSpec.model_validate_json(json_partition_spec)
assert spec == PartitionSpec(
PartitionField(source_id=1, field_id=1000,
transform=TruncateTransform(width=19), name="str_truncate"),
diff --git a/python/tests/table/test_snapshots.py
b/python/tests/table/test_snapshots.py
index b119ae9945..625cbc1b6c 100644
--- a/python/tests/table/test_snapshots.py
+++ b/python/tests/table/test_snapshots.py
@@ -47,17 +47,18 @@ def snapshot_with_properties() -> Snapshot:
def test_serialize_summary() -> None:
- assert Summary(Operation.APPEND).json() == """{"operation": "append"}"""
+ assert Summary(Operation.APPEND).model_dump_json() ==
"""{"operation":"append"}"""
def test_serialize_summary_with_properties() -> None:
- assert Summary(Operation.APPEND, property="yes").json() ==
"""{"operation": "append", "property": "yes"}"""
+ summary = Summary(Operation.APPEND, property="yes")
+ assert summary.model_dump_json() ==
"""{"operation":"append","property":"yes"}"""
def test_serialize_snapshot(snapshot: Snapshot) -> None:
assert (
- snapshot.json()
- == """{"snapshot-id": 25, "parent-snapshot-id": 19, "sequence-number":
200, "timestamp-ms": 1602638573590, "manifest-list": "s3:/a/b/c.avro",
"summary": {"operation": "append"}, "schema-id": 3}"""
+ snapshot.model_dump_json()
+ ==
"""{"snapshot-id":25,"parent-snapshot-id":19,"sequence-number":200,"timestamp-ms":1602638573590,"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append"},"schema-id":3}"""
)
@@ -70,38 +71,38 @@ def test_serialize_snapshot_without_sequence_number() ->
None:
summary=Summary(Operation.APPEND),
schema_id=3,
)
- actual = snapshot.json()
- expected = """{"snapshot-id": 25, "parent-snapshot-id": 19,
"timestamp-ms": 1602638573590, "manifest-list": "s3:/a/b/c.avro", "summary":
{"operation": "append"}, "schema-id": 3}"""
+ actual = snapshot.model_dump_json()
+ expected =
"""{"snapshot-id":25,"parent-snapshot-id":19,"timestamp-ms":1602638573590,"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append"},"schema-id":3}"""
assert actual == expected
def test_serialize_snapshot_with_properties(snapshot_with_properties:
Snapshot) -> None:
assert (
- snapshot_with_properties.json()
- == """{"snapshot-id": 25, "parent-snapshot-id": 19, "sequence-number":
200, "timestamp-ms": 1602638573590, "manifest-list": "s3:/a/b/c.avro",
"summary": {"operation": "append", "foo": "bar"}, "schema-id": 3}"""
+ snapshot_with_properties.model_dump_json()
+ ==
"""{"snapshot-id":25,"parent-snapshot-id":19,"sequence-number":200,"timestamp-ms":1602638573590,"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append","foo":"bar"},"schema-id":3}"""
)
def test_deserialize_summary() -> None:
- summary = Summary.parse_raw("""{"operation": "append"}""")
+ summary = Summary.model_validate_json("""{"operation": "append"}""")
assert summary.operation == Operation.APPEND
def test_deserialize_summary_with_properties() -> None:
- summary = Summary.parse_raw("""{"operation": "append", "property":
"yes"}""")
+ summary = Summary.model_validate_json("""{"operation": "append",
"property": "yes"}""")
assert summary.operation == Operation.APPEND
assert summary.additional_properties == {"property": "yes"}
def test_deserialize_snapshot(snapshot: Snapshot) -> None:
payload = """{"snapshot-id": 25, "parent-snapshot-id": 19,
"sequence-number": 200, "timestamp-ms": 1602638573590, "manifest-list":
"s3:/a/b/c.avro", "summary": {"operation": "append"}, "schema-id": 3}"""
- actual = Snapshot.parse_raw(payload)
+ actual = Snapshot.model_validate_json(payload)
assert actual == snapshot
def test_deserialize_snapshot_with_properties(snapshot_with_properties:
Snapshot) -> None:
- payload = """{"snapshot-id": 25, "parent-snapshot-id": 19,
"sequence-number": 200, "timestamp-ms": 1602638573590, "manifest-list":
"s3:/a/b/c.avro", "summary": {"operation": "append", "foo": "bar"},
"schema-id": 3}"""
- snapshot = Snapshot.parse_raw(payload)
+ payload =
"""{"snapshot-id":25,"parent-snapshot-id":19,"sequence-number":200,"timestamp-ms":1602638573590,"manifest-list":"s3:/a/b/c.avro","summary":{"operation":"append","foo":"bar"},"schema-id":3}"""
+ snapshot = Snapshot.model_validate_json(payload)
assert snapshot == snapshot_with_properties
diff --git a/python/tests/table/test_sorting.py
b/python/tests/table/test_sorting.py
index 45c1ff2404..6b41193631 100644
--- a/python/tests/table/test_sorting.py
+++ b/python/tests/table/test_sorting.py
@@ -42,18 +42,18 @@ def sort_order() -> SortOrder:
def test_serialize_sort_order_unsorted() -> None:
- assert UNSORTED_SORT_ORDER.json() == '{"order-id": 0, "fields": []}'
+ assert UNSORTED_SORT_ORDER.model_dump_json() ==
'{"order-id":0,"fields":[]}'
def test_serialize_sort_order(sort_order: SortOrder) -> None:
- expected = '{"order-id": 22, "fields": [{"source-id": 19, "transform":
"identity", "direction": "asc", "null-order": "nulls-first"}, {"source-id": 25,
"transform": "bucket[4]", "direction": "desc", "null-order": "nulls-last"},
{"source-id": 22, "transform": "void", "direction": "asc", "null-order":
"nulls-first"}]}'
- assert sort_order.json() == expected
+ expected =
'{"order-id":22,"fields":[{"source-id":19,"transform":"identity","direction":"asc","null-order":"nulls-first"},{"source-id":25,"transform":"bucket[4]","direction":"desc","null-order":"nulls-last"},{"source-id":22,"transform":"void","direction":"asc","null-order":"nulls-first"}]}'
+ assert sort_order.model_dump_json() == expected
def test_deserialize_sort_order(sort_order: SortOrder) -> None:
payload = '{"order-id": 22, "fields": [{"source-id": 19, "transform":
"identity", "direction": "asc", "null-order": "nulls-first"}, {"source-id": 25,
"transform": "bucket[4]", "direction": "desc", "null-order": "nulls-last"},
{"source-id": 22, "transform": "void", "direction": "asc", "null-order":
"nulls-first"}]}'
- assert SortOrder.parse_raw(payload) == sort_order
+ assert SortOrder.model_validate_json(payload) == sort_order
def test_sorting_schema(example_table_metadata_v2: Dict[str, Any]) -> None:
diff --git a/python/tests/test_integration.py b/python/tests/test_integration.py
index 9a3e044e21..a63436bdae 100644
--- a/python/tests/test_integration.py
+++ b/python/tests/test_integration.py
@@ -229,7 +229,7 @@ def test_pyarrow_to_iceberg_all_types(table_test_all_types:
Table) -> None:
uri = urlparse(data_file_path)
with fs.open_input_file(f"{uri.netloc}{uri.path}") as fout:
parquet_schema = pq.read_schema(fout)
- stored_iceberg_schema =
Schema.parse_raw(parquet_schema.metadata.get(b"iceberg.schema"))
+ stored_iceberg_schema =
Schema.model_validate_json(parquet_schema.metadata.get(b"iceberg.schema"))
converted_iceberg_schema = pyarrow_to_schema(parquet_schema)
assert converted_iceberg_schema == stored_iceberg_schema
diff --git a/python/tests/test_schema.py b/python/tests/test_schema.py
index d3400b6266..57f1947346 100644
--- a/python/tests/test_schema.py
+++ b/python/tests/test_schema.py
@@ -86,7 +86,7 @@ def test_schema_str(table_schema_simple: Schema) -> None:
def test_schema_repr_single_field() -> None:
"""Test schema representation"""
- actual = repr(schema.Schema(NestedField(1, "foo", StringType()),
schema_id=1))
+ actual = repr(schema.Schema(NestedField(field_id=1, name="foo",
field_type=StringType()), schema_id=1))
expected = "Schema(NestedField(field_id=1, name='foo',
field_type=StringType(), required=True), schema_id=1, identifier_field_ids=[])"
assert expected == actual
@@ -94,7 +94,11 @@ def test_schema_repr_single_field() -> None:
def test_schema_repr_two_fields() -> None:
"""Test schema representation"""
actual = repr(
- schema.Schema(NestedField(1, "foo", StringType()), NestedField(2,
"bar", IntegerType(), required=False), schema_id=1)
+ schema.Schema(
+ NestedField(field_id=1, name="foo", field_type=StringType()),
+ NestedField(field_id=2, name="bar", field_type=IntegerType(),
required=False),
+ schema_id=1,
+ )
)
expected = "Schema(NestedField(field_id=1, name='foo',
field_type=StringType(), required=True), NestedField(field_id=2, name='bar',
field_type=IntegerType(), required=False), schema_id=1,
identifier_field_ids=[])"
assert expected == actual
@@ -486,13 +490,13 @@ def
test_build_position_accessors_with_struct(table_schema_nested: Schema) -> No
def test_serialize_schema(table_schema_simple: Schema) -> None:
- actual = table_schema_simple.json()
- expected = """{"type": "struct", "fields": [{"id": 1, "name": "foo",
"type": "string", "required": false}, {"id": 2, "name": "bar", "type": "int",
"required": true}, {"id": 3, "name": "baz", "type": "boolean", "required":
false}], "schema-id": 1, "identifier-field-ids": [2]}"""
+ actual = table_schema_simple.model_dump_json()
+ expected =
"""{"type":"struct","fields":[{"id":1,"name":"foo","type":"string","required":false},{"id":2,"name":"bar","type":"int","required":true},{"id":3,"name":"baz","type":"boolean","required":false}],"schema-id":1,"identifier-field-ids":[2]}"""
assert actual == expected
def test_deserialize_schema(table_schema_simple: Schema) -> None:
- actual = Schema.parse_raw(
+ actual = Schema.model_validate_json(
"""{"type": "struct", "fields": [{"id": 1, "name": "foo", "type":
"string", "required": false}, {"id": 2, "name": "bar", "type": "int",
"required": true}, {"id": 3, "name": "baz", "type": "boolean", "required":
false}], "schema-id": 1, "identifier-field-ids": [2]}"""
)
expected = table_schema_simple
diff --git a/python/tests/test_transforms.py b/python/tests/test_transforms.py
index 8d2fe19905..d8a2151752 100644
--- a/python/tests/test_transforms.py
+++ b/python/tests/test_transforms.py
@@ -22,6 +22,13 @@ from uuid import UUID
import mmh3 as mmh3
import pytest
+from pydantic import (
+ BeforeValidator,
+ PlainSerializer,
+ RootModel,
+ WithJsonSchema,
+)
+from typing_extensions import Annotated
from pyiceberg import transforms
from pyiceberg.expressions import (
@@ -65,8 +72,8 @@ from pyiceberg.transforms import (
UnknownTransform,
VoidTransform,
YearTransform,
+ parse_transform,
)
-from pyiceberg.typedef import IcebergBaseModel
from pyiceberg.types import (
BinaryType,
BooleanType,
@@ -193,7 +200,7 @@ def test_string_with_surrogate_pair() -> None:
(17501, DayTransform(), "2017-12-01"),
],
)
-def test_date_to_human_string(date_val: int, date_transform:
TimeTransform[Any], expected: str) -> None:
+def test_date_to_human_string(date_val: int, date_transform: Transform[Any,
Any], expected: str) -> None:
assert date_transform.to_human_string(DateType(), date_val) == expected
@@ -430,16 +437,21 @@ def test_void_transform() -> None:
assert void_transform.dedup_name == "void"
-class TestType(IcebergBaseModel):
- __root__: Transform[Any, Any]
+class FauxModel(RootModel):
+ root: Annotated[ # type: ignore
+ Transform,
+ BeforeValidator(parse_transform),
+ PlainSerializer(lambda c: str(c), return_type=str), # pylint:
disable=W0108
+ WithJsonSchema({"type": "string"}, mode="serialization"),
+ ]
def test_bucket_transform_serialize() -> None:
- assert BucketTransform(num_buckets=22).json() == '"bucket[22]"'
+ assert BucketTransform(num_buckets=22).model_dump_json() == '"bucket[22]"'
def test_bucket_transform_deserialize() -> None:
- transform = TestType.parse_raw('"bucket[22]"').__root__
+ transform = FauxModel.model_validate_json('"bucket[22]"').root
assert transform == BucketTransform(num_buckets=22)
@@ -452,11 +464,11 @@ def test_bucket_transform_repr() -> None:
def test_truncate_transform_serialize() -> None:
- assert UnknownTransform("unknown").json() == '"unknown"'
+ assert UnknownTransform("unknown").model_dump_json() == '"unknown"'
def test_unknown_transform_deserialize() -> None:
- transform = TestType.parse_raw('"unknown"').__root__
+ transform = FauxModel.model_validate_json('"unknown"').root
assert transform == UnknownTransform("unknown")
@@ -469,11 +481,11 @@ def test_unknown_transform_repr() -> None:
def test_void_transform_serialize() -> None:
- assert VoidTransform().json() == '"void"'
+ assert VoidTransform().model_dump_json() == '"void"'
def test_void_transform_deserialize() -> None:
- transform = TestType.parse_raw('"void"').__root__
+ transform = FauxModel.model_validate_json('"void"').root
assert transform == VoidTransform()
@@ -486,38 +498,38 @@ def test_void_transform_repr() -> None:
def test_year_transform_serialize() -> None:
- assert YearTransform().json() == '"year"'
+ assert YearTransform().model_dump_json() == '"year"'
def test_year_transform_deserialize() -> None:
- transform = TestType.parse_raw('"year"').__root__
+ transform = FauxModel.model_validate_json('"year"').root
assert transform == YearTransform()
def test_month_transform_serialize() -> None:
- assert MonthTransform().json() == '"month"'
+ assert MonthTransform().model_dump_json() == '"month"'
def test_month_transform_deserialize() -> None:
- transform = TestType.parse_raw('"month"').__root__
+ transform = FauxModel.model_validate_json('"month"').root
assert transform == MonthTransform()
def test_day_transform_serialize() -> None:
- assert DayTransform().json() == '"day"'
+ assert DayTransform().model_dump_json() == '"day"'
def test_day_transform_deserialize() -> None:
- transform = TestType.parse_raw('"day"').__root__
+ transform = FauxModel.model_validate_json('"day"').root
assert transform == DayTransform()
def test_hour_transform_serialize() -> None:
- assert HourTransform().json() == '"hour"'
+ assert HourTransform().model_dump_json() == '"hour"'
def test_hour_transform_deserialize() -> None:
- transform = TestType.parse_raw('"hour"').__root__
+ transform = FauxModel.model_validate_json('"hour"').root
assert transform == HourTransform()
diff --git a/python/tests/test_types.py b/python/tests/test_types.py
index dbe54688c7..249ee98a6f 100644
--- a/python/tests/test_types.py
+++ b/python/tests/test_types.py
@@ -19,9 +19,8 @@ import pickle
from typing import Type
import pytest
-from pydantic import ValidationError
-from pyiceberg.typedef import IcebergBaseModel
+from pyiceberg.exceptions import ValidationError
from pyiceberg.types import (
BinaryType,
BooleanType,
@@ -222,18 +221,12 @@ def test_non_parameterized_type_equality(
# Examples based on
https://iceberg.apache.org/spec/#appendix-c-json-serialization
-
-
-class IcebergTestType(IcebergBaseModel):
- __root__: IcebergType
-
-
def test_serialization_boolean() -> None:
- assert BooleanType().json() == '"boolean"'
+ assert BooleanType().model_dump_json() == '"boolean"'
def test_deserialization_boolean() -> None:
- assert IcebergTestType.parse_raw('"boolean"') == BooleanType()
+ assert BooleanType.model_validate_json('"boolean"') == BooleanType()
def test_str_boolean() -> None:
@@ -245,11 +238,11 @@ def test_repr_boolean() -> None:
def test_serialization_int() -> None:
- assert IntegerType().json() == '"int"'
+ assert IntegerType().model_dump_json() == '"int"'
def test_deserialization_int() -> None:
- assert IcebergTestType.parse_raw('"int"') == IntegerType()
+ assert IntegerType.model_validate_json('"int"') == IntegerType()
def test_str_int() -> None:
@@ -261,11 +254,11 @@ def test_repr_int() -> None:
def test_serialization_long() -> None:
- assert LongType().json() == '"long"'
+ assert LongType().model_dump_json() == '"long"'
def test_deserialization_long() -> None:
- assert IcebergTestType.parse_raw('"long"') == LongType()
+ assert LongType.model_validate_json('"long"') == LongType()
def test_str_long() -> None:
@@ -277,11 +270,11 @@ def test_repr_long() -> None:
def test_serialization_float() -> None:
- assert FloatType().json() == '"float"'
+ assert FloatType().model_dump_json() == '"float"'
def test_deserialization_float() -> None:
- assert IcebergTestType.parse_raw('"float"') == FloatType()
+ assert FloatType.model_validate_json('"float"') == FloatType()
def test_str_float() -> None:
@@ -293,11 +286,11 @@ def test_repr_float() -> None:
def test_serialization_double() -> None:
- assert DoubleType().json() == '"double"'
+ assert DoubleType().model_dump_json() == '"double"'
def test_deserialization_double() -> None:
- assert IcebergTestType.parse_raw('"double"') == DoubleType()
+ assert DoubleType.model_validate_json('"double"') == DoubleType()
def test_str_double() -> None:
@@ -309,11 +302,11 @@ def test_repr_double() -> None:
def test_serialization_date() -> None:
- assert DateType().json() == '"date"'
+ assert DateType().model_dump_json() == '"date"'
def test_deserialization_date() -> None:
- assert IcebergTestType.parse_raw('"date"') == DateType()
+ assert DateType.model_validate_json('"date"') == DateType()
def test_str_date() -> None:
@@ -325,11 +318,11 @@ def test_repr_date() -> None:
def test_serialization_time() -> None:
- assert TimeType().json() == '"time"'
+ assert TimeType().model_dump_json() == '"time"'
def test_deserialization_time() -> None:
- assert IcebergTestType.parse_raw('"time"') == TimeType()
+ assert TimeType.model_validate_json('"time"') == TimeType()
def test_str_time() -> None:
@@ -341,11 +334,11 @@ def test_repr_time() -> None:
def test_serialization_timestamp() -> None:
- assert TimestampType().json() == '"timestamp"'
+ assert TimestampType().model_dump_json() == '"timestamp"'
def test_deserialization_timestamp() -> None:
- assert IcebergTestType.parse_raw('"timestamp"') == TimestampType()
+ assert TimestampType.model_validate_json('"timestamp"') == TimestampType()
def test_str_timestamp() -> None:
@@ -357,11 +350,11 @@ def test_repr_timestamp() -> None:
def test_serialization_timestamptz() -> None:
- assert TimestamptzType().json() == '"timestamptz"'
+ assert TimestamptzType().model_dump_json() == '"timestamptz"'
def test_deserialization_timestamptz() -> None:
- assert IcebergTestType.parse_raw('"timestamptz"') == TimestamptzType()
+ assert TimestamptzType.model_validate_json('"timestamptz"') ==
TimestamptzType()
def test_str_timestamptz() -> None:
@@ -373,11 +366,11 @@ def test_repr_timestamptz() -> None:
def test_serialization_string() -> None:
- assert StringType().json() == '"string"'
+ assert StringType().model_dump_json() == '"string"'
def test_deserialization_string() -> None:
- assert IcebergTestType.parse_raw('"string"') == StringType()
+ assert StringType.model_validate_json('"string"') == StringType()
def test_str_string() -> None:
@@ -389,11 +382,11 @@ def test_repr_string() -> None:
def test_serialization_uuid() -> None:
- assert UUIDType().json() == '"uuid"'
+ assert UUIDType().model_dump_json() == '"uuid"'
def test_deserialization_uuid() -> None:
- assert IcebergTestType.parse_raw('"uuid"') == UUIDType()
+ assert UUIDType.model_validate_json('"uuid"') == UUIDType()
def test_str_uuid() -> None:
@@ -405,21 +398,18 @@ def test_repr_uuid() -> None:
def test_serialization_fixed() -> None:
- assert FixedType(22).json() == '"fixed[22]"'
+ assert FixedType(22).model_dump_json() == '"fixed[22]"'
def test_deserialization_fixed() -> None:
- fixed = IcebergTestType.parse_raw('"fixed[22]"')
+ fixed = FixedType.model_validate_json('"fixed[22]"')
assert fixed == FixedType(22)
-
- inner = fixed.__root__
- assert isinstance(inner, FixedType)
- assert len(inner) == 22
+ assert len(fixed) == 22
def test_deserialization_fixed_failure() -> None:
with pytest.raises(ValidationError) as exc_info:
- _ = IcebergTestType.parse_raw('"fixed[abc]"')
+ _ = FixedType.model_validate_json('"fixed[abc]"')
assert "Could not match fixed[abc], expected format fixed[22]" in
str(exc_info.value)
@@ -433,11 +423,11 @@ def test_repr_fixed() -> None:
def test_serialization_binary() -> None:
- assert BinaryType().json() == '"binary"'
+ assert BinaryType().model_dump_json() == '"binary"'
def test_deserialization_binary() -> None:
- assert IcebergTestType.parse_raw('"binary"') == BinaryType()
+ assert BinaryType.model_validate_json('"binary"') == BinaryType()
def test_str_binary() -> None:
@@ -449,22 +439,19 @@ def test_repr_binary() -> None:
def test_serialization_decimal() -> None:
- assert DecimalType(19, 25).json() == '"decimal(19, 25)"'
+ assert DecimalType(19, 25).model_dump_json() == '"decimal(19, 25)"'
def test_deserialization_decimal() -> None:
- decimal = IcebergTestType.parse_raw('"decimal(19, 25)"')
+ decimal = DecimalType.model_validate_json('"decimal(19, 25)"')
assert decimal == DecimalType(19, 25)
-
- inner = decimal.__root__
- assert isinstance(inner, DecimalType)
- assert inner.precision == 19
- assert inner.scale == 25
+ assert decimal.precision == 19
+ assert decimal.scale == 25
def test_deserialization_decimal_failure() -> None:
with pytest.raises(ValidationError) as exc_info:
- _ = IcebergTestType.parse_raw('"decimal(abc, def)"')
+ _ = DecimalType.model_validate_json('"decimal(abc, def)"')
assert "Could not parse decimal(abc, def) into a DecimalType" in
str(exc_info.value)
@@ -478,14 +465,14 @@ def test_repr_decimal() -> None:
def test_serialization_nestedfield() -> None:
- expected = '{"id": 1, "name": "required_field", "type": "string",
"required": true, "doc": "this is a doc"}'
- actual = NestedField(1, "required_field", StringType(), True, "this is a
doc").json()
+ expected =
'{"id":1,"name":"required_field","type":"string","required":true,"doc":"this is
a doc"}'
+ actual = NestedField(1, "required_field", StringType(), True, "this is a
doc").model_dump_json()
assert expected == actual
def test_serialization_nestedfield_no_doc() -> None:
- expected = '{"id": 1, "name": "required_field", "type": "string",
"required": true}'
- actual = NestedField(1, "required_field", StringType(), True).json()
+ expected =
'{"id":1,"name":"required_field","type":"string","required":true}'
+ actual = NestedField(1, "required_field", StringType(),
True).model_dump_json()
assert expected == actual
@@ -509,7 +496,7 @@ def test_nestedfield_by_alias() -> None:
def test_deserialization_nestedfield() -> None:
expected = NestedField(1, "required_field", StringType(), True, "this is a
doc")
- actual = NestedField.parse_raw(
+ actual = NestedField.model_validate_json(
'{"id": 1, "name": "required_field", "type": "string", "required":
true, "doc": "this is a doc"}'
)
assert expected == actual
@@ -517,27 +504,27 @@ def test_deserialization_nestedfield() -> None:
def test_deserialization_nestedfield_inner() -> None:
expected = NestedField(1, "required_field", StringType(), True, "this is a
doc")
- actual = IcebergTestType.parse_raw(
- '{"id": 1, "name": "required_field", "type": "string", "required":
true, "doc": "this is a doc"}'
+ actual = NestedField.model_validate_json(
+
'{"id":1,"name":"required_field","type":"string","required":true,"doc":"this is
a doc"}'
)
- assert expected == actual.__root__
+ assert expected == actual
def test_serialization_struct() -> None:
actual = StructType(
NestedField(1, "required_field", StringType(), True, "this is a doc"),
NestedField(2, "optional_field", IntegerType())
- ).json()
+ ).model_dump_json()
expected = (
- '{"type": "struct", "fields": ['
- '{"id": 1, "name": "required_field", "type": "string", "required":
true, "doc": "this is a doc"}, '
- '{"id": 2, "name": "optional_field", "type": "int", "required": true}'
+ '{"type":"struct","fields":['
+
'{"id":1,"name":"required_field","type":"string","required":true,"doc":"this is
a doc"},'
+ '{"id":2,"name":"optional_field","type":"int","required":true}'
"]}"
)
assert actual == expected
def test_deserialization_struct() -> None:
- actual = StructType.parse_raw(
+ actual = StructType.model_validate_json(
"""
{
"type": "struct",
@@ -578,13 +565,13 @@ def test_repr_struct(simple_struct: StructType) -> None:
def test_serialization_list(simple_list: ListType) -> None:
- actual = simple_list.json()
- expected = '{"type": "list", "element-id": 22, "element": "string",
"element-required": true}'
+ actual = simple_list.model_dump_json()
+ expected =
'{"type":"list","element-id":22,"element":"string","element-required":true}'
assert actual == expected
def test_deserialization_list(simple_list: ListType) -> None:
- actual = ListType.parse_raw('{"type": "list", "element-id": 22, "element":
"string", "element-required": true}')
+ actual = ListType.model_validate_json('{"type": "list", "element-id": 22,
"element": "string", "element-required": true}')
assert actual == simple_list
@@ -597,14 +584,14 @@ def test_repr_list(simple_list: ListType) -> None:
def test_serialization_map(simple_map: MapType) -> None:
- actual = simple_map.json()
- expected = """{"type": "map", "key-id": 19, "key": "string", "value-id":
25, "value": "double", "value-required": false}"""
+ actual = simple_map.model_dump_json()
+ expected =
"""{"type":"map","key-id":19,"key":"string","value-id":25,"value":"double","value-required":false}"""
assert actual == expected
def test_deserialization_map(simple_map: MapType) -> None:
- actual = MapType.parse_raw(
+ actual = MapType.model_validate_json(
"""{"type": "map", "key-id": 19, "key": "string", "value-id": 25,
"value": "double", "value-required": false}"""
)
assert actual == simple_map