This is an automated email from the ASF dual-hosted git repository.

XiaoHongbo-Hope pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git


The following commit(s) were added to refs/heads/master by this push:
     new 9571f14d26 [python] Fix null blob write (#7901)
9571f14d26 is described below

commit 9571f14d26b9ca002c646429b72c468f56679d59
Author: XiaoHongbo <[email protected]>
AuthorDate: Wed May 20 22:11:02 2026 +0800

    [python] Fix null blob write (#7901)
    
    ### Purpose
    
    PR #7847 added null support to `BlobFormatWriter.add_element` /
    `write_value` and `FormatBlobReader`, but the public `write_arrow` path
    (`DataBlobWriter` → `BlobWriter` → `BlobFileWriter._to_blob`) still
    rejects `None` and raises `ValueError`, so writing a batch with a NULL
    inline blob fails end-to-end:
    
    ```
    ValueError: Blob field value must be bytes/blob or serialized 
BlobDescriptor bytes, got <class 'NoneType'>.
    ```
    
    This PR makes `BlobFileWriter._to_blob` return `None` for `None` input.
    
    ### Tests
    
    - New e2e test `DataBlobWriterTest.test_null_blob`
---
 paimon-python/pypaimon/tests/blob_table_test.py    | 49 ++++++++++++++++++++++
 .../pypaimon/write/writer/blob_file_writer.py      |  8 +++-
 2 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/paimon-python/pypaimon/tests/blob_table_test.py 
b/paimon-python/pypaimon/tests/blob_table_test.py
index 56359c1d1f..95f89ff8de 100755
--- a/paimon-python/pypaimon/tests/blob_table_test.py
+++ b/paimon-python/pypaimon/tests/blob_table_test.py
@@ -1014,6 +1014,55 @@ class DataBlobWriterTest(unittest.TestCase):
         print(
             f"✅ End-to-end blob write/read test passed: wrote and read back 
{len(blob_data)} blob records correctly")  # noqa: E501
 
+    def test_null_blob(self):
+        from pypaimon import Schema
+
+        pa_schema = pa.schema([
+            ('id', pa.int32()),
+            ('name', pa.string()),
+            ('blob_data', pa.large_binary()),
+        ])
+
+        schema = Schema.from_pyarrow_schema(
+            pa_schema,
+            options={
+                'row-tracking.enabled': 'true',
+                'data-evolution.enabled': 'true'
+            }
+        )
+        self.catalog.create_table('test_db.blob_write_read_e2e_null', schema, 
False)
+        table = self.catalog.get_table('test_db.blob_write_read_e2e_null')
+
+        test_data = pa.Table.from_pydict({
+            'id':   [1, 2, 3, 4, 5],
+            'name': ['a', 'b', 'c', 'd', 'e'],
+            'blob_data': [
+                b'first_blob',
+                None,
+                b'third_blob',
+                None,
+                b'fifth_blob',
+            ],
+        }, schema=pa_schema)
+
+        write_builder = table.new_batch_write_builder()
+        writer = write_builder.new_write()
+        writer.write_arrow(test_data)
+
+        commit_messages = writer.prepare_commit()
+        write_builder.new_commit().commit(commit_messages)
+        writer.close()
+
+        read_builder = table.new_read_builder()
+        result = 
read_builder.new_read().to_arrow(read_builder.new_scan().plan().splits())
+
+        self.assertEqual(result.column('id').to_pylist(), [1, 2, 3, 4, 5])
+        self.assertEqual(result.column('name').to_pylist(), ['a', 'b', 'c', 
'd', 'e'])
+        self.assertEqual(
+            result.column('blob_data').to_pylist(),
+            [b'first_blob', None, b'third_blob', None, b'fifth_blob'],
+        )
+
     def test_blob_write_read_partition(self):
         """Test complete end-to-end blob functionality: write blob data and 
read it back to verify correctness."""
         from pypaimon import Schema
diff --git a/paimon-python/pypaimon/write/writer/blob_file_writer.py 
b/paimon-python/pypaimon/write/writer/blob_file_writer.py
index 2a7e9580e9..31d945a4ed 100644
--- a/paimon-python/pypaimon/write/writer/blob_file_writer.py
+++ b/paimon-python/pypaimon/write/writer/blob_file_writer.py
@@ -15,8 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-import pyarrow as pa
 from pathlib import Path
+from typing import Optional
+
+import pyarrow as pa
 
 from pypaimon.write.blob_format_writer import BlobFormatWriter
 from pypaimon.table.row.generic_row import GenericRow, RowKind
@@ -58,9 +60,11 @@ class BlobFileWriter:
         self.writer.add_element(row)
         self.row_count += 1
 
-    def _to_blob(self, col_data) -> Blob:
+    def _to_blob(self, col_data) -> Optional[Blob]:
         if hasattr(col_data, 'as_py'):
             col_data = col_data.as_py()
+        if col_data is None:
+            return None
         if isinstance(col_data, str):
             col_data = col_data.encode('utf-8')
         if isinstance(col_data, bytearray):

Reply via email to