(paimon) branch master updated: [python] Fix avro write timestamp without timezone wrongly (#7259)

lzljs3620320 Wed, 11 Feb 2026 05:58:24 -0800

This is an automated email from the ASF dual-hosted git repository.

lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git



The following commit(s) were added to refs/heads/master by this push:
     new 7914ab0cf1 [python] Fix avro write timestamp without timezone wrongly 
(#7259)
7914ab0cf1 is described below

commit 7914ab0cf1e07d6fec5bf45d33c35cc18af9de2a
Author: umi <[email protected]>
AuthorDate: Wed Feb 11 21:56:38 2026 +0800

    [python] Fix avro write timestamp without timezone wrongly (#7259)
---
 paimon-python/pypaimon/filesystem/local_file_io.py          |  9 ++++++++-
 paimon-python/pypaimon/filesystem/pyarrow_file_io.py        |  9 ++++++++-
 paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py | 10 +++-------
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/paimon-python/pypaimon/filesystem/local_file_io.py 
b/paimon-python/pypaimon/filesystem/local_file_io.py
index 183eb0ea13..ed2c7d8117 100644
--- a/paimon-python/pypaimon/filesystem/local_file_io.py
+++ b/paimon-python/pypaimon/filesystem/local_file_io.py
@@ -20,6 +20,7 @@ import os
 import shutil
 import threading
 import uuid
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, Optional
 from urllib.parse import urlparse
@@ -337,7 +338,13 @@ class LocalFileIO(FileIO):
         def record_generator():
             num_rows = len(list(records_dict.values())[0])
             for i in range(num_rows):
-                yield {col: records_dict[col][i] for col in 
records_dict.keys()}
+                record = {}
+                for col in records_dict.keys():
+                    value = records_dict[col][i]
+                    if isinstance(value, datetime) and value.tzinfo is None:
+                        value = value.replace(tzinfo=timezone.utc)
+                    record[col] = value
+                yield record
         
         records = record_generator()
         
diff --git a/paimon-python/pypaimon/filesystem/pyarrow_file_io.py 
b/paimon-python/pypaimon/filesystem/pyarrow_file_io.py
index 74cc26a208..a8cd004ea2 100644
--- a/paimon-python/pypaimon/filesystem/pyarrow_file_io.py
+++ b/paimon-python/pypaimon/filesystem/pyarrow_file_io.py
@@ -19,6 +19,7 @@ import logging
 import os
 import subprocess
 import uuid
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional
 from urllib.parse import splitport, urlparse
@@ -399,7 +400,13 @@ class PyArrowFileIO(FileIO):
         def record_generator():
             num_rows = len(list(records_dict.values())[0])
             for i in range(num_rows):
-                yield {col: records_dict[col][i] for col in 
records_dict.keys()}
+                record = {}
+                for col in records_dict.keys():
+                    value = records_dict[col][i]
+                    if isinstance(value, datetime) and value.tzinfo is None:
+                        value = value.replace(tzinfo=timezone.utc)
+                    record[col] = value
+                yield record
 
         records = record_generator()
 
diff --git a/paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py 
b/paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py
index ee3ba6971e..98ae212a68 100644
--- a/paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py
+++ b/paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py
@@ -200,13 +200,9 @@ class JavaPyReadWriteTest(unittest.TestCase):
         read_builder = table.new_read_builder()
         table_scan = read_builder.new_scan()
         table_read = read_builder.new_read()
-        initial_result = table_read.to_pandas(table_scan.plan().splits())
-        print(f"Format: {file_format}, Result:\n{initial_result}")
-        self.assertEqual(len(initial_result), 6)
-        # Data order may vary due to partitioning/bucketing, so compare as sets
-        expected_names = {'Apple', 'Banana', 'Carrot', 'Broccoli', 'Chicken', 
'Beef'}
-        actual_names = set(initial_result['name'].tolist())
-        self.assertEqual(actual_names, expected_names)
+        result = table_read.to_pandas(table_scan.plan().splits())
+        print(f"Format: {file_format}, Result:\n{result}")
+        self.assertEqual(initial_data.to_dict(), result.to_dict())
 
         from pypaimon.write.row_key_extractor import FixedBucketRowKeyExtractor
         expected_bucket_first_row = 2

(paimon) branch master updated: [python] Fix avro write timestamp without timezone wrongly (#7259)

Reply via email to