HonahX commented on code in PR #352:
URL: https://github.com/apache/iceberg-python/pull/352#discussion_r1475415226
##########
pyiceberg/schema.py:
##########
@@ -221,6 +227,11 @@ def find_type(self, name_or_id: Union[str, int],
case_sensitive: bool = True) ->
def highest_field_id(self) -> int:
return max(self._lazy_id_to_name.keys(), default=0)
+ def name_mapping(self) -> NameMapping:
Review Comment:
Could this be a `@cached_property` since schema should be immutable?
##########
pyiceberg/table/__init__.py:
##########
@@ -133,6 +132,43 @@
_JAVA_LONG_MAX = 9223372036854775807
+def _check_schema(table_schema: Schema, other_schema: "pa.Schema") -> None:
+ from pyiceberg.io.pyarrow import _pyarrow_to_schema_without_ids,
pyarrow_to_schema
+
+ name_mapping = table_schema.name_mapping()
+ try:
+ task_schema = pyarrow_to_schema(other_schema,
name_mapping=name_mapping)
+ except ValueError as e:
+ names = itertools.chain(*[field.names for field in name_mapping])
+ other_schema = _pyarrow_to_schema_without_ids(other_schema)
+ other_names = itertools.chain(*[field.names for field in
other_schema.name_mapping()])
+ additional_names = set(other_names) - set(names)
+ raise ValueError(
+ f"PyArrow table contains more columns: {',
'.join(sorted(additional_names))}. Update the schema first (hint, use
union_by_name)."
+ ) from e
+
+ if table_schema.as_struct() != task_schema.as_struct():
+ from rich.console import Console
+ from rich.table import Table as RichTable
+
+ console = Console(record=True)
+
+ rich_table = RichTable(show_header=True, header_style="bold")
+ rich_table.add_column("")
+ rich_table.add_column("Table field")
+ rich_table.add_column("Dataframe field")
+
+ for lhs in table_schema.fields:
+ try:
+ rhs = task_schema.find_field(lhs.field_id)
+ rich_table.add_row("✅" if lhs == rhs else "❌", str(lhs),
str(rhs))
Review Comment:
These symbols look nice! However, I'm concerned about whether these emojis
will render correctly across all platforms. Could this be a potential issue, or
am I overthinking it?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]