This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new e2ac52d661 GH-44188: [Python] Fix pandas roundtrip with bytes column
names (#44171)
e2ac52d661 is described below
commit e2ac52d661d4cd6528c769060c35d4c17ec44cfc
Author: Piong1997 <[email protected]>
AuthorDate: Thu Feb 27 18:44:32 2025 +0800
GH-44188: [Python] Fix pandas roundtrip with bytes column names (#44171)
### Rationale for this change
There is a bug that when column dtype is np.bytes,it will goto the final
branch and run level=level.astype(dtype)
### Are these changes tested?
Yes
* GitHub Issue: #44188
Lead-authored-by: Piong1997 <[email protected]>
Co-authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
python/pyarrow/pandas_compat.py | 2 +-
python/pyarrow/tests/test_pandas.py | 7 +++++++
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/python/pyarrow/pandas_compat.py b/python/pyarrow/pandas_compat.py
index 83f9fd4afa..4164ad2106 100644
--- a/python/pyarrow/pandas_compat.py
+++ b/python/pyarrow/pandas_compat.py
@@ -1163,7 +1163,7 @@ def _reconstruct_columns_from_metadata(columns,
column_indexes):
if dtype == np.bytes_:
level = level.map(encoder)
# ARROW-13756: if index is timezone aware DataTimeIndex
- if pandas_dtype == "datetimetz":
+ elif pandas_dtype == "datetimetz":
tz = pa.lib.string_to_tzinfo(
column_indexes[0]['metadata']['timezone'])
level = pd.to_datetime(level, utc=True).tz_convert(tz)
diff --git a/python/pyarrow/tests/test_pandas.py
b/python/pyarrow/tests/test_pandas.py
index 7c3ee5ff35..4ad04c9ad1 100644
--- a/python/pyarrow/tests/test_pandas.py
+++ b/python/pyarrow/tests/test_pandas.py
@@ -5255,6 +5255,13 @@ def test_nested_chunking_valid():
schema=schema)
+def test_bytes_column_name_to_pandas():
+ df = pd.DataFrame([[0.1, 0.2], [0.3, 0.4]], columns=[b'col1', b'col2'])
+ table = pa.Table.from_pandas(df)
+ assert table.column_names == ['col1', 'col2']
+ assert table.to_pandas().equals(df)
+
+
@pytest.mark.processes
def test_is_data_frame_race_condition():
# See https://github.com/apache/arrow/issues/39313