JingsongLi commented on code in PR #6969:
URL: https://github.com/apache/paimon/pull/6969#discussion_r2668776798
##########
paimon-python/pypaimon/write/file_store_commit.py:
##########
@@ -155,50 +255,70 @@ def _try_commit(self, commit_kind, commit_entries,
commit_identifier):
else:
deleted_file_count += 1
delta_record_count -= entry.file.row_count
- self.manifest_file_manager.write(new_manifest_file, commit_entries)
- # TODO: implement noConflictsOrFail logic
- partition_columns = list(zip(*(entry.partition.values for entry in
commit_entries)))
- partition_min_stats = [min(col) for col in partition_columns]
- partition_max_stats = [max(col) for col in partition_columns]
- partition_null_counts = [sum(value == 0 for value in col) for col in
partition_columns]
- if not all(count == 0 for count in partition_null_counts):
- raise RuntimeError("Partition value should not be null")
- manifest_file_path =
f"{self.manifest_file_manager.manifest_path}/{new_manifest_file}"
- new_manifest_list = ManifestFileMeta(
- file_name=new_manifest_file,
- file_size=self.table.file_io.get_file_size(manifest_file_path),
- num_added_files=added_file_count,
- num_deleted_files=deleted_file_count,
- partition_stats=SimpleStats(
- min_values=GenericRow(
- values=partition_min_stats,
- fields=self.table.partition_keys_fields
- ),
- max_values=GenericRow(
- values=partition_max_stats,
- fields=self.table.partition_keys_fields
+
+ created_manifest_file = None
+ created_delta_manifest_list = None
+ created_base_manifest_list = None
+
+ try:
+ self.manifest_file_manager.write(new_manifest_file, commit_entries)
+ created_manifest_file = new_manifest_file
+
+ # TODO: implement noConflictsOrFail logic
+ partition_columns = list(zip(*(entry.partition.values for entry in
commit_entries)))
+ partition_min_stats = [min(col) for col in partition_columns]
+ partition_max_stats = [max(col) for col in partition_columns]
+ partition_null_counts = [sum(value == 0 for value in col) for col
in partition_columns]
+ if not all(count == 0 for count in partition_null_counts):
+ raise RuntimeError("Partition value should not be null")
+
+ manifest_file_path =
f"{self.manifest_file_manager.manifest_path}/{new_manifest_file}"
+ file_size = self.table.file_io.get_file_size(manifest_file_path)
+
+ new_manifest_list = ManifestFileMeta(
+ file_name=new_manifest_file,
+ file_size=file_size,
+ num_added_files=added_file_count,
+ num_deleted_files=deleted_file_count,
+ partition_stats=SimpleStats(
+ min_values=GenericRow(
+ values=partition_min_stats,
+ fields=self.table.partition_keys_fields
+ ),
+ max_values=GenericRow(
+ values=partition_max_stats,
+ fields=self.table.partition_keys_fields
+ ),
+ null_counts=partition_null_counts,
),
- null_counts=partition_null_counts,
- ),
- schema_id=self.table.table_schema.id,
- )
- self.manifest_list_manager.write(delta_manifest_list,
[new_manifest_list])
+ schema_id=self.table.table_schema.id,
+ )
- # process existing_manifest
- total_record_count = 0
- if latest_snapshot:
- existing_manifest_files =
self.manifest_list_manager.read_all(latest_snapshot)
- previous_record_count = latest_snapshot.total_record_count
- if previous_record_count:
- total_record_count += previous_record_count
- else:
- existing_manifest_files = []
- self.manifest_list_manager.write(base_manifest_list,
existing_manifest_files)
+ self.manifest_list_manager.write(delta_manifest_list,
[new_manifest_list])
+ created_delta_manifest_list = delta_manifest_list
+
+ # process existing_manifest
+ total_record_count = 0
+ if latest_snapshot:
+ existing_manifest_files =
self.manifest_list_manager.read_all(latest_snapshot)
+ previous_record_count = latest_snapshot.total_record_count
+ if previous_record_count:
+ total_record_count += previous_record_count
+ else:
+ existing_manifest_files = []
+
+ self.manifest_list_manager.write(base_manifest_list,
existing_manifest_files)
+ created_base_manifest_list = base_manifest_list
+
+ except Exception as e:
+ self._cleanup_preparation_failure(created_manifest_file,
created_delta_manifest_list,
+ created_base_manifest_list)
+ logger.warning(f"Thread {thread_id}: Exception occurs when
preparing snapshot: {e}", exc_info=True)
+ return RetryResult(latest_snapshot, [], e)
- # process snapshot
total_record_count += delta_record_count
snapshot_data = Snapshot(
- version=3,
+ version=1,
Review Comment:
Why change this to 1?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]