samredai commented on a change in pull request #3407: URL: https://github.com/apache/iceberg/pull/3407#discussion_r738876550
########## File path: python/src/iceberg/partition_spec.py ########## @@ -0,0 +1,289 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +from urllib import parse +from collections import defaultdict +from typing import List, Optional + +from iceberg.partition_field import PartitionField +from iceberg.schema import Schema +from iceberg.types import Type, NestedField +from iceberg.validation_exception import ValidationException + +PARTITION_DATA_ID_START = 1000 + + +class PartitionSpec(object): + fields_by_source_id: defaultdict[list] = None + field_list: List[PartitionField] = None + + def __init__( + self, + schema: Schema, + spec_id: int, + part_fields: List[PartitionField], + last_assigned_field_id: int, + ): + self._schema = schema + self._spec_id = spec_id + self._fields = [part_field for part_field in part_fields] + self._last_assigned_field_id = last_assigned_field_id + + @property + def schema(self) -> Schema: + return self._schema + + @property + def spec_id(self) -> int: + return self._spec_id + + @property + def fields(self) -> List[PartitionField]: + return self._fields + + @property + def last_assigned_field_id(self) -> int: + return self._last_assigned_field_id + + def is_partitioned(self): + return len(self.fields) < 1 + + def _generate_fields_by_source_id(self): + if self.fields_by_source_id is None: + fields_source_to_field_dict = defaultdict(list) + for field in self.fields: + fields_source_to_field_dict[field.source_id] = [field] + return fields_source_to_field_dict + return None + + def get_fields_by_source_id(self, field_id: int) -> List[PartitionField]: + return self._generate_fields_by_source_id().get(field_id, None) + + def __eq__(self, other): + if isinstance(other, PartitionSpec): + if self.spec_id != other.spec_id: + return False + return self.fields == other.fields + return False + + def __str__(self): + partition_spec_str = "[" + for field in self.fields: + partition_spec_str += "\n" + partition_spec_str += " " + field + if len(self.fields) > 0: + partition_spec_str += "\n" + partition_spec_str += "]" + return partition_spec_str + + def compatible_with(self, other): + if self.__eq__(other): + return True + + if len(self.fields) != len(other.fields): + return False + + index = 0 + for field in self.fields: + other_field: PartitionField = other.fields[index] + if ( + field.source_id != other_field.source_id + or field.name != other_field.name + ): + index += 1 + # TODO: Add transform check + return False + return True + + def partition_type(self): + struct_fields = [] + # TODO: Needs transform + pass + + def escape(self, input_str): + try: + return parse.urlencode(input_str, encoding="utf-8") + except TypeError as e: + raise e + + def partition_to_path(self): + # TODO: Needs transform + pass + + def _generate_unpartitioned_spec(self): Review comment: Sorry if I'm off here, but if this is just a helper for getting a commonly used instantiation of this class, shouldn't these just be default arguments to `__init__()` so a user get's this if they just do `p_spec = PartitionSpec()`? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
