[ https://issues.apache.org/jira/browse/ARROW-1713?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16215267#comment-16215267 ]
ASF GitHub Bot commented on ARROW-1713: --------------------------------------- Licht-T closed pull request #1239: ARROW-1713: [Python] Fix incorrect pd.Series.index serialization URL: https://github.com/apache/arrow/pull/1239 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/python/pyarrow/serialization.py b/python/pyarrow/serialization.py index eed6aae83..ffeb52bbb 100644 --- a/python/pyarrow/serialization.py +++ b/python/pyarrow/serialization.py @@ -102,7 +102,8 @@ def _deserialize_numpy_array(data): import pandas as pd def _serialize_pandas_series(obj): - return serialize_pandas(pd.DataFrame({obj.name: obj})) + return serialize_pandas(pd.DataFrame({obj.name: obj}, + index=obj.index)).to_pybytes() def _deserialize_pandas_series(data): deserialized = deserialize_pandas(data) diff --git a/python/pyarrow/tests/test_ipc.py b/python/pyarrow/tests/test_ipc.py index 68c0c80aa..4a66416fb 100644 --- a/python/pyarrow/tests/test_ipc.py +++ b/python/pyarrow/tests/test_ipc.py @@ -433,15 +433,22 @@ def test_serialize_pandas_no_preserve_index(): def test_serialize_with_pandas_objects(): df = pd.DataFrame({'a': [1, 2, 3]}, index=[1, 2, 3]) + s = pd.Series([1, 2, 3, 4]) + s.index = pd.RangeIndex(start=0, stop=8, step=2) + # FIXME: No named Series name will be serialized u'None'. + s.name = 'discrete' + data = { 'a_series': df['a'], - 'a_frame': df + 'a_frame': df, + 'discrete_idx_series': s } serialized = pa.serialize(data).to_buffer() deserialized = pa.deserialize(serialized) assert_frame_equal(deserialized['a_frame'], df) assert_series_equal(deserialized['a_series'], df['a']) + assert_series_equal(deserialized['discrete_idx_series'], s) def test_schema_batch_serialize_methods(): ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org > [Python] Incorrect serialization pd.Series.index > ------------------------------------------------ > > Key: ARROW-1713 > URL: https://issues.apache.org/jira/browse/ARROW-1713 > Project: Apache Arrow > Issue Type: Bug > Components: Python > Affects Versions: 0.7.1 > Reporter: Licht Takeuchi > Assignee: Licht Takeuchi > Labels: pull-request-available > > Repro code. > {code:java} > import pandas as pd > import pyarrow as pa > s = pd.Series([1,2,3,4]) > s.index = pd.RangeIndex(start=0, stop=8, step=2) > serialized = pa.serialize(s).to_buffer() > pa.deserialize(serialized) > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029)