JingsongLi commented on code in PR #6969:
URL: https://github.com/apache/paimon/pull/6969#discussion_r2668776798


##########
paimon-python/pypaimon/write/file_store_commit.py:
##########
@@ -155,50 +255,70 @@ def _try_commit(self, commit_kind, commit_entries, 
commit_identifier):
             else:
                 deleted_file_count += 1
                 delta_record_count -= entry.file.row_count
-        self.manifest_file_manager.write(new_manifest_file, commit_entries)
-        # TODO: implement noConflictsOrFail logic
-        partition_columns = list(zip(*(entry.partition.values for entry in 
commit_entries)))
-        partition_min_stats = [min(col) for col in partition_columns]
-        partition_max_stats = [max(col) for col in partition_columns]
-        partition_null_counts = [sum(value == 0 for value in col) for col in 
partition_columns]
-        if not all(count == 0 for count in partition_null_counts):
-            raise RuntimeError("Partition value should not be null")
-        manifest_file_path = 
f"{self.manifest_file_manager.manifest_path}/{new_manifest_file}"
-        new_manifest_list = ManifestFileMeta(
-            file_name=new_manifest_file,
-            file_size=self.table.file_io.get_file_size(manifest_file_path),
-            num_added_files=added_file_count,
-            num_deleted_files=deleted_file_count,
-            partition_stats=SimpleStats(
-                min_values=GenericRow(
-                    values=partition_min_stats,
-                    fields=self.table.partition_keys_fields
-                ),
-                max_values=GenericRow(
-                    values=partition_max_stats,
-                    fields=self.table.partition_keys_fields
+
+        created_manifest_file = None
+        created_delta_manifest_list = None
+        created_base_manifest_list = None
+
+        try:
+            self.manifest_file_manager.write(new_manifest_file, commit_entries)
+            created_manifest_file = new_manifest_file
+
+            # TODO: implement noConflictsOrFail logic
+            partition_columns = list(zip(*(entry.partition.values for entry in 
commit_entries)))
+            partition_min_stats = [min(col) for col in partition_columns]
+            partition_max_stats = [max(col) for col in partition_columns]
+            partition_null_counts = [sum(value == 0 for value in col) for col 
in partition_columns]
+            if not all(count == 0 for count in partition_null_counts):
+                raise RuntimeError("Partition value should not be null")
+
+            manifest_file_path = 
f"{self.manifest_file_manager.manifest_path}/{new_manifest_file}"
+            file_size = self.table.file_io.get_file_size(manifest_file_path)
+
+            new_manifest_list = ManifestFileMeta(
+                file_name=new_manifest_file,
+                file_size=file_size,
+                num_added_files=added_file_count,
+                num_deleted_files=deleted_file_count,
+                partition_stats=SimpleStats(
+                    min_values=GenericRow(
+                        values=partition_min_stats,
+                        fields=self.table.partition_keys_fields
+                    ),
+                    max_values=GenericRow(
+                        values=partition_max_stats,
+                        fields=self.table.partition_keys_fields
+                    ),
+                    null_counts=partition_null_counts,
                 ),
-                null_counts=partition_null_counts,
-            ),
-            schema_id=self.table.table_schema.id,
-        )
-        self.manifest_list_manager.write(delta_manifest_list, 
[new_manifest_list])
+                schema_id=self.table.table_schema.id,
+            )
 
-        # process existing_manifest
-        total_record_count = 0
-        if latest_snapshot:
-            existing_manifest_files = 
self.manifest_list_manager.read_all(latest_snapshot)
-            previous_record_count = latest_snapshot.total_record_count
-            if previous_record_count:
-                total_record_count += previous_record_count
-        else:
-            existing_manifest_files = []
-        self.manifest_list_manager.write(base_manifest_list, 
existing_manifest_files)
+            self.manifest_list_manager.write(delta_manifest_list, 
[new_manifest_list])
+            created_delta_manifest_list = delta_manifest_list
+
+            # process existing_manifest
+            total_record_count = 0
+            if latest_snapshot:
+                existing_manifest_files = 
self.manifest_list_manager.read_all(latest_snapshot)
+                previous_record_count = latest_snapshot.total_record_count
+                if previous_record_count:
+                    total_record_count += previous_record_count
+            else:
+                existing_manifest_files = []
+
+            self.manifest_list_manager.write(base_manifest_list, 
existing_manifest_files)
+            created_base_manifest_list = base_manifest_list
+
+        except Exception as e:
+            self._cleanup_preparation_failure(created_manifest_file, 
created_delta_manifest_list,
+                                              created_base_manifest_list)
+            logger.warning(f"Thread {thread_id}: Exception occurs when 
preparing snapshot: {e}", exc_info=True)
+            return RetryResult(latest_snapshot, [], e)
 
-        # process snapshot
         total_record_count += delta_record_count
         snapshot_data = Snapshot(
-            version=3,
+            version=1,

Review Comment:
   Why change this to 1?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to