This is an automated email from the ASF dual-hosted git repository.
rok pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new ef89dc459d GH-47966: [Python] PyArrow v22.0 assumes Pandas DataFrame
attrs are serializable (#47977)
ef89dc459d is described below
commit ef89dc459d836b881b49bf818f85497bb528e63c
Author: Bogdan Romenskii <[email protected]>
AuthorDate: Tue Oct 28 17:38:59 2025 +0100
GH-47966: [Python] PyArrow v22.0 assumes Pandas DataFrame attrs are
serializable (#47977)
### Rationale for this change
Please see #47966, #47147 assumed that pandas attributes should be JSON
serializable, whilst they shouldn't be
### What changes are included in this PR?
Checked if serializing the pandas attributes raises any errors (`TypeError`
and `OverflowError`), default to emptying the attributes if that's the case and
raise a warning that the attributes were emptied.
### Are these changes tested?
Yes
### Are there any user-facing changes?
No, fixing a regression
* GitHub Issue: #47966
Authored-by: Bogdan Romenskii <[email protected]>
Signed-off-by: Rok Mihevc <[email protected]>
---
python/pyarrow/pandas_compat.py | 9 +++++++++
python/pyarrow/tests/test_pandas.py | 16 ++++++++++++++++
2 files changed, 25 insertions(+)
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 300a14a391..b0009507e4 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -277,6 +277,15 @@ def construct_metadata(columns_to_convert, df,
column_names, index_levels,
attributes = df.attrs if hasattr(df, "attrs") else {}
+ try:
+ json.dumps(attributes)
+ except Exception as e:
+ attributes = {}
+ warnings.warn(
+ f"Could not serialize pd.DataFrame.attrs: {e},"
+ f" defaulting to empty attributes.",
+ UserWarning, stacklevel=4)
+
return {
b'pandas': json.dumps({
'index_columns': index_descriptors,
diff --git a/python/pyarrow/tests/test_pandas.py
b/python/pyarrow/tests/test_pandas.py
index ceea2527da..5a55e90fbb 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -5265,3 +5265,19 @@ def test_bytes_column_name_to_pandas():
def test_is_data_frame_race_condition():
# See https://github.com/apache/arrow/issues/39313
test_util.invoke_script('arrow_39313.py')
+
+
+def test_json_unserializable_pd_df_attrs():
+ df = pd.DataFrame({"x": [1, 2, 3]})
+
+ df.attrs["timestamp"] = datetime.fromisoformat("2025-10-28T14:20:42")
+
+ with pytest.warns(
+ UserWarning,
+ match="Could not serialize pd.DataFrame.attrs:",
+ ):
+ df_table = pa.table(df)
+
+ pd_metadata = json.loads(df_table.schema.metadata[b"pandas"])
+
+ assert not pd_metadata["attributes"]