JingsongLi commented on code in PR #6274:
URL: https://github.com/apache/paimon/pull/6274#discussion_r2370854701
##########
paimon-python/pypaimon/read/table_scan.py:
##########
@@ -92,40 +105,96 @@ def plan(self) -> Plan:
entry for entry in added_entries
if (tuple(entry.partition.values), entry.bucket,
entry.file.file_name) not in deleted_entries
]
-
if self.predicate:
file_entries = self._filter_by_predicate(file_entries)
-
- partitioned_split = defaultdict(list)
- for entry in file_entries:
- partitioned_split[(tuple(entry.partition.values),
entry.bucket)].append(entry)
-
- splits = []
- for key, values in partitioned_split.items():
- if self.table.is_primary_key_table:
- splits += self._create_primary_key_splits(values)
- else:
- splits += self._create_append_only_splits(values)
-
- splits = self._apply_push_down_limit(splits)
-
- return Plan(file_entries, splits)
+ return file_entries
def with_shard(self, idx_of_this_subtask, number_of_para_subtasks) ->
'TableScan':
+ if idx_of_this_subtask >= number_of_para_subtasks:
+ raise Exception("idx_of_this_subtask must be less than
number_of_para_subtasks")
self.idx_of_this_subtask = idx_of_this_subtask
self.number_of_para_subtasks = number_of_para_subtasks
return self
+ def _append_only_filter_by_shard(self, partitioned_split):
Review Comment:
partitioned_files?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]