This is an automated email from the ASF dual-hosted git repository.
lzljs3620320 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/paimon.git
The following commit(s) were added to refs/heads/master by this push:
new 7914ab0cf1 [python] Fix avro write timestamp without timezone wrongly
(#7259)
7914ab0cf1 is described below
commit 7914ab0cf1e07d6fec5bf45d33c35cc18af9de2a
Author: umi <[email protected]>
AuthorDate: Wed Feb 11 21:56:38 2026 +0800
[python] Fix avro write timestamp without timezone wrongly (#7259)
---
paimon-python/pypaimon/filesystem/local_file_io.py | 9 ++++++++-
paimon-python/pypaimon/filesystem/pyarrow_file_io.py | 9 ++++++++-
paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py | 10 +++-------
3 files changed, 19 insertions(+), 9 deletions(-)
diff --git a/paimon-python/pypaimon/filesystem/local_file_io.py
b/paimon-python/pypaimon/filesystem/local_file_io.py
index 183eb0ea13..ed2c7d8117 100644
--- a/paimon-python/pypaimon/filesystem/local_file_io.py
+++ b/paimon-python/pypaimon/filesystem/local_file_io.py
@@ -20,6 +20,7 @@ import os
import shutil
import threading
import uuid
+from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Optional
from urllib.parse import urlparse
@@ -337,7 +338,13 @@ class LocalFileIO(FileIO):
def record_generator():
num_rows = len(list(records_dict.values())[0])
for i in range(num_rows):
- yield {col: records_dict[col][i] for col in
records_dict.keys()}
+ record = {}
+ for col in records_dict.keys():
+ value = records_dict[col][i]
+ if isinstance(value, datetime) and value.tzinfo is None:
+ value = value.replace(tzinfo=timezone.utc)
+ record[col] = value
+ yield record
records = record_generator()
diff --git a/paimon-python/pypaimon/filesystem/pyarrow_file_io.py
b/paimon-python/pypaimon/filesystem/pyarrow_file_io.py
index 74cc26a208..a8cd004ea2 100644
--- a/paimon-python/pypaimon/filesystem/pyarrow_file_io.py
+++ b/paimon-python/pypaimon/filesystem/pyarrow_file_io.py
@@ -19,6 +19,7 @@ import logging
import os
import subprocess
import uuid
+from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
from urllib.parse import splitport, urlparse
@@ -399,7 +400,13 @@ class PyArrowFileIO(FileIO):
def record_generator():
num_rows = len(list(records_dict.values())[0])
for i in range(num_rows):
- yield {col: records_dict[col][i] for col in
records_dict.keys()}
+ record = {}
+ for col in records_dict.keys():
+ value = records_dict[col][i]
+ if isinstance(value, datetime) and value.tzinfo is None:
+ value = value.replace(tzinfo=timezone.utc)
+ record[col] = value
+ yield record
records = record_generator()
diff --git a/paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py
b/paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py
index ee3ba6971e..98ae212a68 100644
--- a/paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py
+++ b/paimon-python/pypaimon/tests/e2e/java_py_read_write_test.py
@@ -200,13 +200,9 @@ class JavaPyReadWriteTest(unittest.TestCase):
read_builder = table.new_read_builder()
table_scan = read_builder.new_scan()
table_read = read_builder.new_read()
- initial_result = table_read.to_pandas(table_scan.plan().splits())
- print(f"Format: {file_format}, Result:\n{initial_result}")
- self.assertEqual(len(initial_result), 6)
- # Data order may vary due to partitioning/bucketing, so compare as sets
- expected_names = {'Apple', 'Banana', 'Carrot', 'Broccoli', 'Chicken',
'Beef'}
- actual_names = set(initial_result['name'].tolist())
- self.assertEqual(actual_names, expected_names)
+ result = table_read.to_pandas(table_scan.plan().splits())
+ print(f"Format: {file_format}, Result:\n{result}")
+ self.assertEqual(initial_data.to_dict(), result.to_dict())
from pypaimon.write.row_key_extractor import FixedBucketRowKeyExtractor
expected_bucket_first_row = 2