discivigour commented on code in PR #7032:
URL: https://github.com/apache/paimon/pull/7032#discussion_r2688854653
##########
paimon-python/pypaimon/read/scanner/full_starting_scanner.py:
##########
@@ -753,3 +522,263 @@ def _filter_blob(files: List[DataFileMeta]) ->
List[DataFileMeta]:
result.append(file)
return result
+
+ def _without_delete_row(self, data_file_meta: DataFileMeta) -> bool:
+ # null to true to be compatible with old version
+ if data_file_meta.delete_row_count is None:
+ return True
+ return data_file_meta.delete_row_count == 0
+
+ def _partial_read(self):
+ return False
+
+ def _filter_by_pos(self, files):
+ pass
+
+ def _compute_split_pos(self, splits: List['Split']) -> None:
+ pass
+
+
+class PartialStartingScanner(FullStartingScanner):
+ def __init__(self, table, predicate: Optional[Predicate], limit:
Optional[int]):
+ super().__init__(table, predicate, limit)
+ # for shard
+ self.idx_of_this_subtask = None
+ self.number_of_para_subtasks = None
+ self.start_pos_of_this_subtask = None
+ self.end_pos_of_this_subtask = None
+ self.plan_start_end_pos = None
+
+ def with_shard(self, idx_of_this_subtask, number_of_para_subtasks) ->
'FullStartingScanner':
+ if idx_of_this_subtask >= number_of_para_subtasks:
+ raise Exception("idx_of_this_subtask must be less than
number_of_para_subtasks")
+ if self.start_pos_of_this_subtask is not None:
+ raise Exception("with_shard and with_slice cannot be used
simultaneously")
Review Comment:
👌
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]