leaves12138 commented on code in PR #6977:
URL: https://github.com/apache/paimon/pull/6977#discussion_r2671055245
##########
paimon-python/pypaimon/write/file_store_commit.py:
##########
@@ -242,103 +192,76 @@ def _try_commit_once(self, retry_result:
Optional[RetryResult], commit_kind: str
else:
deleted_file_count += 1
delta_record_count -= entry.file.row_count
-
- try:
- self.manifest_file_manager.write(new_manifest_file, commit_entries)
-
- # TODO: implement noConflictsOrFail logic
- partition_columns = list(zip(*(entry.partition.values for entry in
commit_entries)))
- partition_min_stats = [min(col) for col in partition_columns]
- partition_max_stats = [max(col) for col in partition_columns]
- partition_null_counts = [sum(value == 0 for value in col) for col
in partition_columns]
- if not all(count == 0 for count in partition_null_counts):
- raise RuntimeError("Partition value should not be null")
-
- manifest_file_path =
f"{self.manifest_file_manager.manifest_path}/{new_manifest_file}"
- file_size = self.table.file_io.get_file_size(manifest_file_path)
-
- new_manifest_file_meta = ManifestFileMeta(
- file_name=new_manifest_file,
- file_size=file_size,
- num_added_files=added_file_count,
- num_deleted_files=deleted_file_count,
- partition_stats=SimpleStats(
- min_values=GenericRow(
- values=partition_min_stats,
- fields=self.table.partition_keys_fields
- ),
- max_values=GenericRow(
- values=partition_max_stats,
- fields=self.table.partition_keys_fields
- ),
- null_counts=partition_null_counts,
+ self.manifest_file_manager.write(new_manifest_file, commit_entries)
+ # TODO: implement noConflictsOrFail logic
+ partition_columns = list(zip(*(entry.partition.values for entry in
commit_entries)))
+ partition_min_stats = [min(col) for col in partition_columns]
+ partition_max_stats = [max(col) for col in partition_columns]
+ partition_null_counts = [sum(value == 0 for value in col) for col in
partition_columns]
+ if not all(count == 0 for count in partition_null_counts):
+ raise RuntimeError("Partition value should not be null")
+ manifest_file_path =
f"{self.manifest_file_manager.manifest_path}/{new_manifest_file}"
+ new_manifest_file_meta = ManifestFileMeta(
+ file_name=new_manifest_file,
+ file_size=self.table.file_io.get_file_size(manifest_file_path),
+ num_added_files=added_file_count,
+ num_deleted_files=deleted_file_count,
+ partition_stats=SimpleStats(
+ min_values=GenericRow(
+ values=partition_min_stats,
+ fields=self.table.partition_keys_fields
),
- schema_id=self.table.table_schema.id,
- )
+ max_values=GenericRow(
+ values=partition_max_stats,
+ fields=self.table.partition_keys_fields
+ ),
+ null_counts=partition_null_counts,
+ ),
+ schema_id=self.table.table_schema.id,
+ )
+ self.manifest_list_manager.write(delta_manifest_list,
[new_manifest_file_meta])
+
+ # process existing_manifest
+ total_record_count = 0
+ if latest_snapshot:
+ existing_manifest_files =
self.manifest_list_manager.read_all(latest_snapshot)
+ previous_record_count = latest_snapshot.total_record_count
+ if previous_record_count:
+ total_record_count += previous_record_count
+ else:
+ existing_manifest_files = []
+ self.manifest_list_manager.write(base_manifest_list,
existing_manifest_files)
+
+ total_record_count += delta_record_count
+ snapshot_data = Snapshot(
+ version=3,
+ id=new_snapshot_id,
+ schema_id=self.table.table_schema.id,
+ base_manifest_list=base_manifest_list,
+ delta_manifest_list=delta_manifest_list,
+ total_record_count=total_record_count,
+ delta_record_count=delta_record_count,
+ commit_user=self.commit_user,
+ commit_identifier=commit_identifier,
+ commit_kind=commit_kind,
+ time_millis=int(time.time() * 1000),
+ next_row_id=next_row_id,
+ )
- self.manifest_list_manager.write(delta_manifest_list,
[new_manifest_file_meta])
-
- # process existing_manifest
- total_record_count = 0
- if latest_snapshot:
- existing_manifest_files =
self.manifest_list_manager.read_all(latest_snapshot)
- previous_record_count = latest_snapshot.total_record_count
- if previous_record_count:
- total_record_count += previous_record_count
- else:
- existing_manifest_files = []
-
- self.manifest_list_manager.write(base_manifest_list,
existing_manifest_files)
- total_record_count += delta_record_count
- snapshot_data = Snapshot(
- version=3,
- id=new_snapshot_id,
- schema_id=self.table.table_schema.id,
- base_manifest_list=base_manifest_list,
- delta_manifest_list=delta_manifest_list,
- total_record_count=total_record_count,
- delta_record_count=delta_record_count,
- commit_user=self.commit_user,
- commit_identifier=commit_identifier,
- commit_kind=commit_kind,
- time_millis=int(time.time() * 1000),
- next_row_id=next_row_id,
- )
- # Generate partition statistics for the commit
- statistics = self._generate_partition_statistics(commit_entries)
- except Exception as e:
- self._cleanup_preparation_failure(new_manifest_file,
delta_manifest_list,
Review Comment:
OK
##########
paimon-python/pypaimon/write/file_store_commit.py:
##########
@@ -156,58 +130,34 @@ def _try_commit(self, commit_kind, commit_entries,
commit_identifier):
commit_entries = self._generate_overwrite_entries()
result = self._try_commit_once(
- retry_result=retry_result,
commit_kind=commit_kind,
commit_entries=commit_entries,
Review Comment:
OK
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]