samredai commented on a change in pull request #3677:
URL: https://github.com/apache/iceberg/pull/3677#discussion_r765240638



##########
File path: python/src/iceberg/table/metadata.py
##########
@@ -0,0 +1,215 @@
+import codecs
+import json
+from typing import Any, Optional, Union
+
+import boto3
+from jsonschema import validate as validate_json
+from jsonschema.exceptions import ValidationError
+
+from iceberg.io.s3 import S3Url
+
+TABLE_METADATA_V1_SCHEMA = {
+    "$schema": "http://json-schema.org/draft-04/schema#";,
+    "type": "object",
+    "properties": {
+        "format-version": {"type": "string"},
+        "table-uuid": {"type": "string"},
+        "location": {"type": "string"},
+        "last-sequence-number": {"type": "integer"},
+        "last-updated-ms": {"type": "integer"},
+        "last-column-id": {"type": "integer"},
+        "schemas": {"type": "array", "items": {}},
+        "current-schema-id": {"type": "integer"},
+        "partition-spec": {"type": "array", "items": {}},
+        "partition-specs": {"type": "array", "items": {}},
+        "default-spec-id": {"type": "integer"},
+        "last-partition-id": {"type": "integer"},
+        "properties": {"type": "object"},
+        "current-snapshot-id": {"type": "number"},
+        "snapshots": {"type": "array", "items": {}},
+        "snapshot-log": {"type": "array", "items": {}},
+        "metadata-log": {"type": "array", "items": {}},
+        "sort-orders": {"type": "array", "items": {}},
+        "default-sort-order-id": {"type": "integer"},
+    },
+    "required": [
+        "format-version",
+        "table-uuid",
+        "location",
+        "last-sequence-number",
+        "last-updated-ms",
+        "last-column-id",
+        "schemas",
+        "current-schema-id",
+        "partition-spec",
+        "partition-specs",
+        "default-spec-id",
+        "last-partition-id",
+        "properties",
+        "current-snapshot-id",
+        "snapshots",
+        "snapshot-log",
+        "metadata-log",
+        "sort-orders",
+        "default-sort-order-id",
+    ],
+}
+TABLE_METADATA_V2_SCHEMA = {
+    "$schema": "http://json-schema.org/draft-04/schema#";,
+    "type": "object",
+    "properties": {
+        "format-version": {"type": "string"},
+        "table-uuid": {"type": "string"},
+        "location": {"type": "string"},
+        "last-sequence-number": {"type": "integer"},
+        "last-updated-ms": {"type": "integer"},
+        "last-column-id": {"type": "integer"},
+        "schemas": {"type": "array", "items": {}},
+        "current-schema-id": {"type": "integer"},
+        "partition-spec": {"type": "array", "items": {}},
+        "partition-specs": {"type": "array", "items": {}},
+        "default-spec-id": {"type": "integer"},
+        "last-partition-id": {"type": "integer"},
+        "properties": {"type": "object"},
+        "current-snapshot-id": {"type": "number"},
+        "snapshots": {"type": "array", "items": {}},
+        "snapshot-log": {"type": "array", "items": {}},
+        "metadata-log": {"type": "array", "items": {}},
+        "sort-orders": {"type": "array", "items": {}},
+        "default-sort-order-id": {"type": "integer"},
+    },
+    "required": [
+        "format-version",
+        "table-uuid",
+        "location",
+        "last-sequence-number",
+        "last-updated-ms",
+        "last-column-id",
+        "schemas",
+        "current-schema-id",
+        "partition-spec",
+        "partition-specs",
+        "default-spec-id",
+        "last-partition-id",
+        "properties",
+        "current-snapshot-id",
+        "snapshots",
+        "snapshot-log",
+        "metadata-log",
+        "sort-orders",
+        "default-sort-order-id",
+    ],
+}
+
+
+class TableMetadata:
+    """Metadata for an Iceberg table as specified in the Apache Iceberg
+    spec (https://iceberg.apache.org/spec/#iceberg-table-spec)
+
+    Args:
+      metadata (dict): Table metadata dictionary representation
+      version (str|int): The metadata spec version (1 or 2)
+    """
+
+    def __init__(self, metadata: dict, version: Union[str, int]):
+        self._version = version
+        self._metadata = metadata
+        for name, value in metadata.items():
+            setattr(self, self._clean_attribute_name(name), self._wrap(value))
+
+    def _wrap(self, value: Any):
+        """A recursive function that drills into iterable values and returns
+        nested TableMetadata instances

Review comment:
       I've updated the PR to remove the wrapper approach and set the top level 
as class attributes. I've also added a `to_dict()` method that's used by 
`validate()` where the `TableMetadata` instance is serialized into a python 
dictionary and then passed into the jsonschema validate function. I've also 
added freezing of instances and this punts the question of how we perform table 
metadata updates.
   
   I can follow-up with a PR that uses the builder pattern where a python 
`dict` is the mutable form. So building from an existing table metadata 
instance would start with calling `to_dict()`.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to