smaheshwar-pltr commented on code in PR #2230:
URL: https://github.com/apache/iceberg-python/pull/2230#discussion_r2222471562
##########
pyiceberg/table/__init__.py:
##########
@@ -1659,10 +1659,108 @@ def __init__(
self.row_filter = _parse_row_filter(row_filter)
self.selected_fields = selected_fields
self.case_sensitive = case_sensitive
- self.snapshot_id = snapshot_id
self.options = options
self.limit = limit
+ @abstractmethod
+ def projection(self) -> Schema: ...
+
+ @abstractmethod
+ def plan_files(self) -> Iterable[ScanTask]: ...
+
+ @abstractmethod
+ def to_arrow(self) -> pa.Table: ...
+
+ def select(self: A, *field_names: str) -> A:
+ if "*" in self.selected_fields:
+ return self.update(selected_fields=field_names)
+ return
self.update(selected_fields=tuple(set(self.selected_fields).intersection(set(field_names))))
+
+ def filter(self: A, expr: Union[str, BooleanExpression]) -> A:
+ return self.update(row_filter=And(self.row_filter,
_parse_row_filter(expr)))
+
+ def with_case_sensitive(self: A, case_sensitive: bool = True) -> A:
+ return self.update(case_sensitive=case_sensitive)
+
+ def update(self: A, **overrides: Any) -> A:
+ """Create a copy of this table scan with updated fields."""
+ from inspect import signature
+
+ # Extract those attributes that are constructor parameters. We don't
use self.__dict__ as the kwargs to the
+ # constructors because it may contain additional attributes that are
not part of the constructor signature.
+ params = signature(type(self).__init__).parameters.keys() - {"self"}
# Skip "self" parameter
+ kwargs = {param: getattr(self, param) for param in params} # Assume
parameters are attributes
+
+ return type(self)(**{**kwargs, **overrides})
+
+ def to_pandas(self, **kwargs: Any) -> pd.DataFrame:
Review Comment:
Highlighting this change. I've added default implementations based on
`to_arrow()` in this class, whereas before previously these methods were
abstract on `TableScan`.
This technically changes the `TableScan` class in a user-facing way because
it now has default implementations, but this felt fine to me. Subclasses can
still override these methods if they wish.
The motivation here was to reduce duplication between `DataScan` and
`IncrementalAppendScan` - by introducing these default implementations in the
superclass, all table scans get those defaults for free.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]