This is an automated email from the ASF dual-hosted git repository.

JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 1c8ba39b1d [python] Fix crash when updating all columns via 
update_by_arrow_with_row_id (#8043)
1c8ba39b1d is described below

commit 1c8ba39b1de559601ec64f48c692bac56941e635
Author: XiaoHongbo <[email protected]>
AuthorDate: Mon Jun 1 11:47:30 2026 +0800

    [python] Fix crash when updating all columns via 
update_by_arrow_with_row_id (#8043)
    
    Updating **all columns** of a row via `update_by_arrow_with_row_id`
    crashes with `ValueError: column_names cannot be empty`. This PR fixes
    it by resolving update columns from data when update type is unset.
---
 paimon-python/pypaimon/tests/table_update_test.py | 39 +++++++++++++++++++++++
 paimon-python/pypaimon/write/table_update.py      |  6 +++-
 2 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/paimon-python/pypaimon/tests/table_update_test.py 
b/paimon-python/pypaimon/tests/table_update_test.py
index 53a84666d1..181cf7a11b 100644
--- a/paimon-python/pypaimon/tests/table_update_test.py
+++ b/paimon-python/pypaimon/tests/table_update_test.py
@@ -123,6 +123,45 @@ class _TableUpdateTestBase(DataEvolutionTestBase):
             result['city'].to_pylist(),
         )
 
+    def test_update_columns_fall_back_to_data_when_unset(self):
+        table = self._create_seeded_table()
+
+        self._do_update(table, pa.Table.from_pydict({
+            '_ROW_ID': [0, 1, 2, 3, 4],
+            'id': [1, 2, 3, 4, 5],
+            'name': ['A', 'B', 'C', 'D', 'E'],
+            'age': [1, 2, 3, 4, 5],
+            'city': ['c0', 'c1', 'c2', 'c3', 'c4'],
+        }), ['id', 'name', 'age', 'city'])
+        result = self._read_all(table)
+        self.assertEqual(['A', 'B', 'C', 'D', 'E'], result['name'].to_pylist())
+        self.assertEqual([1, 2, 3, 4, 5], result['age'].to_pylist())
+        self.assertEqual(['c0', 'c1', 'c2', 'c3', 'c4'], 
result['city'].to_pylist())
+
+        wb = self._make_write_builder(table)
+        tu = wb.new_update()
+        cid = self._next_commit_id()
+        msgs = self._apply_update(tu, pa.Table.from_pydict({
+            '_ROW_ID': [0, 1],
+            'age': [99, 98],
+        }), cid)
+        tc = wb.new_commit()
+        self._apply_commit(tc, msgs, cid)
+        tc.close()
+        result = self._read_all(table)
+        self.assertEqual([99, 98, 3, 4, 5], result['age'].to_pylist())
+        self.assertEqual(['A', 'B', 'C', 'D', 'E'], result['name'].to_pylist())
+
+    def test_update_with_only_row_id_raises(self):
+        table = self._create_seeded_table()
+        wb = self._make_write_builder(table)
+        tu = wb.new_update()
+        cid = self._next_commit_id()
+        with self.assertRaises(ValueError):
+            self._apply_update(tu, pa.Table.from_pydict({
+                '_ROW_ID': [0, 1],
+            }), cid)
+
     def test_partitioned_table_update(self):
         """Updates work on a partitioned table the same as a flat one."""
         table = self._create_table(partition_keys=['city'])
diff --git a/paimon-python/pypaimon/write/table_update.py 
b/paimon-python/pypaimon/write/table_update.py
index 0882bda8c6..4b063dfa7b 100644
--- a/paimon-python/pypaimon/write/table_update.py
+++ b/paimon-python/pypaimon/write/table_update.py
@@ -26,6 +26,7 @@ from pypaimon.globalindex import Range
 from pypaimon.manifest.schema.data_file_meta import DataFileMeta
 from pypaimon.read.split import DataSplit
 from pypaimon.snapshot.snapshot import BATCH_COMMIT_IDENTIFIER
+from pypaimon.table.special_fields import SpecialFields
 from pypaimon.write.commit_message import CommitMessage
 from pypaimon.write.table_update_by_row_id import TableUpdateByRowId
 from pypaimon.write.table_upsert_by_key import TableUpsertByKey
@@ -136,9 +137,12 @@ class TableUpdate:
     def _update_by_arrow_with_row_id(
             self, table: pa.Table, commit_identifier: int
     ) -> List[CommitMessage]:
+        cols = self.update_cols if self.update_cols is not None else [
+            c for c in table.column_names if c != SpecialFields.ROW_ID.name
+        ]
         return TableUpdateByRowId(
             self.table, self.commit_user, commit_identifier,
-        ).update_columns(table, self.update_cols)
+        ).update_columns(table, cols)
 
     def _upsert_by_arrow_with_key(
             self,

Reply via email to