smaheshwar-pltr commented on code in PR #2232:
URL: https://github.com/apache/iceberg-python/pull/2232#discussion_r2223095477
##########
pyiceberg/table/__init__.py:
##########
@@ -1819,76 +1819,19 @@ def _match_deletes_to_data_file(data_entry:
ManifestEntry, positional_delete_ent
class DataScan(TableScan):
- def _build_partition_projection(self, spec_id: int) -> BooleanExpression:
- project = inclusive_projection(self.table_metadata.schema(),
self.table_metadata.specs()[spec_id], self.case_sensitive)
- return project(self.row_filter)
-
@cached_property
- def partition_filters(self) -> KeyDefaultDict[int, BooleanExpression]:
- return KeyDefaultDict(self._build_partition_projection)
-
- def _build_manifest_evaluator(self, spec_id: int) ->
Callable[[ManifestFile], bool]:
- spec = self.table_metadata.specs()[spec_id]
- return manifest_evaluator(spec, self.table_metadata.schema(),
self.partition_filters[spec_id], self.case_sensitive)
-
- def _build_partition_evaluator(self, spec_id: int) -> Callable[[DataFile],
bool]:
- spec = self.table_metadata.specs()[spec_id]
- partition_type = spec.partition_type(self.table_metadata.schema())
- partition_schema = Schema(*partition_type.fields)
- partition_expr = self.partition_filters[spec_id]
-
- # The lambda created here is run in multiple threads.
- # So we avoid creating _EvaluatorExpression methods bound to a single
- # shared instance across multiple threads.
- return lambda data_file: expression_evaluator(partition_schema,
partition_expr, self.case_sensitive)(data_file.partition)
-
- def _build_metrics_evaluator(self) -> Callable[[DataFile], bool]:
- schema = self.table_metadata.schema()
- include_empty_files =
strtobool(self.options.get("include_empty_files", "false"))
-
- # The lambda created here is run in multiple threads.
- # So we avoid creating _InclusiveMetricsEvaluator methods bound to a
single
- # shared instance across multiple threads.
- return lambda data_file: _InclusiveMetricsEvaluator(
- schema,
- self.row_filter,
- self.case_sensitive,
- include_empty_files,
- ).eval(data_file)
-
- def _build_residual_evaluator(self, spec_id: int) -> Callable[[DataFile],
ResidualEvaluator]:
- spec = self.table_metadata.specs()[spec_id]
-
- from pyiceberg.expressions.visitors import residual_evaluator_of
-
- # The lambda created here is run in multiple threads.
- # So we avoid creating _EvaluatorExpression methods bound to a single
- # shared instance across multiple threads.
- return lambda datafile: (
- residual_evaluator_of(
- spec=spec,
- expr=self.row_filter,
- case_sensitive=self.case_sensitive,
- schema=self.table_metadata.schema(),
- )
+ def _manifest_planner(self) -> ManifestGroupPlanner:
Review Comment:
This could also be a field on the class set in the constructor. Kept the
diff smaller here, but happy to change
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]