This is an automated email from the ASF dual-hosted git repository.
JingsongLi pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 1c8ba39b1d [python] Fix crash when updating all columns via
update_by_arrow_with_row_id (#8043)
1c8ba39b1d is described below
commit 1c8ba39b1de559601ec64f48c692bac56941e635
Author: XiaoHongbo <[email protected]>
AuthorDate: Mon Jun 1 11:47:30 2026 +0800
[python] Fix crash when updating all columns via
update_by_arrow_with_row_id (#8043)
Updating **all columns** of a row via `update_by_arrow_with_row_id`
crashes with `ValueError: column_names cannot be empty`. This PR fixes
it by resolving update columns from data when update type is unset.
---
paimon-python/pypaimon/tests/table_update_test.py | 39 +++++++++++++++++++++++
paimon-python/pypaimon/write/table_update.py | 6 +++-
2 files changed, 44 insertions(+), 1 deletion(-)
diff --git a/paimon-python/pypaimon/tests/table_update_test.py
b/paimon-python/pypaimon/tests/table_update_test.py
index 53a84666d1..181cf7a11b 100644
--- a/paimon-python/pypaimon/tests/table_update_test.py
+++ b/paimon-python/pypaimon/tests/table_update_test.py
@@ -123,6 +123,45 @@ class _TableUpdateTestBase(DataEvolutionTestBase):
result['city'].to_pylist(),
)
+ def test_update_columns_fall_back_to_data_when_unset(self):
+ table = self._create_seeded_table()
+
+ self._do_update(table, pa.Table.from_pydict({
+ '_ROW_ID': [0, 1, 2, 3, 4],
+ 'id': [1, 2, 3, 4, 5],
+ 'name': ['A', 'B', 'C', 'D', 'E'],
+ 'age': [1, 2, 3, 4, 5],
+ 'city': ['c0', 'c1', 'c2', 'c3', 'c4'],
+ }), ['id', 'name', 'age', 'city'])
+ result = self._read_all(table)
+ self.assertEqual(['A', 'B', 'C', 'D', 'E'], result['name'].to_pylist())
+ self.assertEqual([1, 2, 3, 4, 5], result['age'].to_pylist())
+ self.assertEqual(['c0', 'c1', 'c2', 'c3', 'c4'],
result['city'].to_pylist())
+
+ wb = self._make_write_builder(table)
+ tu = wb.new_update()
+ cid = self._next_commit_id()
+ msgs = self._apply_update(tu, pa.Table.from_pydict({
+ '_ROW_ID': [0, 1],
+ 'age': [99, 98],
+ }), cid)
+ tc = wb.new_commit()
+ self._apply_commit(tc, msgs, cid)
+ tc.close()
+ result = self._read_all(table)
+ self.assertEqual([99, 98, 3, 4, 5], result['age'].to_pylist())
+ self.assertEqual(['A', 'B', 'C', 'D', 'E'], result['name'].to_pylist())
+
+ def test_update_with_only_row_id_raises(self):
+ table = self._create_seeded_table()
+ wb = self._make_write_builder(table)
+ tu = wb.new_update()
+ cid = self._next_commit_id()
+ with self.assertRaises(ValueError):
+ self._apply_update(tu, pa.Table.from_pydict({
+ '_ROW_ID': [0, 1],
+ }), cid)
+
def test_partitioned_table_update(self):
"""Updates work on a partitioned table the same as a flat one."""
table = self._create_table(partition_keys=['city'])
diff --git a/paimon-python/pypaimon/write/table_update.py
b/paimon-python/pypaimon/write/table_update.py
index 0882bda8c6..4b063dfa7b 100644
--- a/paimon-python/pypaimon/write/table_update.py
+++ b/paimon-python/pypaimon/write/table_update.py
@@ -26,6 +26,7 @@ from pypaimon.globalindex import Range
from pypaimon.manifest.schema.data_file_meta import DataFileMeta
from pypaimon.read.split import DataSplit
from pypaimon.snapshot.snapshot import BATCH_COMMIT_IDENTIFIER
+from pypaimon.table.special_fields import SpecialFields
from pypaimon.write.commit_message import CommitMessage
from pypaimon.write.table_update_by_row_id import TableUpdateByRowId
from pypaimon.write.table_upsert_by_key import TableUpsertByKey
@@ -136,9 +137,12 @@ class TableUpdate:
def _update_by_arrow_with_row_id(
self, table: pa.Table, commit_identifier: int
) -> List[CommitMessage]:
+ cols = self.update_cols if self.update_cols is not None else [
+ c for c in table.column_names if c != SpecialFields.ROW_ID.name
+ ]
return TableUpdateByRowId(
self.table, self.commit_user, commit_identifier,
- ).update_columns(table, self.update_cols)
+ ).update_columns(table, cols)
def _upsert_by_arrow_with_key(
self,