Repository: arrow Updated Branches: refs/heads/master 085c8754b -> c90ca60c1
ARROW-378: Python: Respect timezone on conversion of Pandas datetime columns arrow is now pandas datetime timezone aware Author: ahnj <[email protected]> Closes #287 from ahnj/timestamp-aware and squashes the following commits: 0221ed0 [ahnj] ARROW-378: Python: Respect timezone on conversion of Pandas datetime columns Project: http://git-wip-us.apache.org/repos/asf/arrow/repo Commit: http://git-wip-us.apache.org/repos/asf/arrow/commit/c90ca60c Tree: http://git-wip-us.apache.org/repos/asf/arrow/tree/c90ca60c Diff: http://git-wip-us.apache.org/repos/asf/arrow/diff/c90ca60c Branch: refs/heads/master Commit: c90ca60c1859b2b70c4f2dd3fb8c41b0f75f02d0 Parents: 085c875 Author: ahnj <[email protected]> Authored: Mon Jan 23 23:44:22 2017 -0500 Committer: Wes McKinney <[email protected]> Committed: Mon Jan 23 23:44:22 2017 -0500 ---------------------------------------------------------------------- python/pyarrow/array.pyx | 6 ++++- python/pyarrow/tests/test_convert_pandas.py | 29 ++++++++++++++++++++++-- 2 files changed, 32 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/arrow/blob/c90ca60c/python/pyarrow/array.pyx ---------------------------------------------------------------------- diff --git a/python/pyarrow/array.pyx b/python/pyarrow/array.pyx index 92206f2..c3a5a04 100644 --- a/python/pyarrow/array.pyx +++ b/python/pyarrow/array.pyx @@ -71,9 +71,13 @@ cdef class Array: timestamps_to_ms : bool, optional Convert datetime columns to ms resolution. This is needed for - compability with other functionality like Parquet I/O which + compatibility with other functionality like Parquet I/O which only supports milliseconds. + Notes + ----- + Localized timestamps will currently be returned as UTC (pandas's native representation). + Timezone-naive data will be implicitly interpreted as UTC. Examples -------- http://git-wip-us.apache.org/repos/asf/arrow/blob/c90ca60c/python/pyarrow/tests/test_convert_pandas.py ---------------------------------------------------------------------- diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py index 30705c4..674a436 100644 --- a/python/pyarrow/tests/test_convert_pandas.py +++ b/python/pyarrow/tests/test_convert_pandas.py @@ -63,7 +63,7 @@ class TestPandasConversion(unittest.TestCase): def _check_pandas_roundtrip(self, df, expected=None, nthreads=1, timestamps_to_ms=False, expected_schema=None, - schema=None): + check_dtype=True, schema=None): table = A.Table.from_pandas(df, timestamps_to_ms=timestamps_to_ms, schema=schema) result = table.to_pandas(nthreads=nthreads) @@ -71,7 +71,7 @@ class TestPandasConversion(unittest.TestCase): assert table.schema.equals(expected_schema) if expected is None: expected = df - tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected, check_dtype=check_dtype) def _check_array_roundtrip(self, values, expected=None, timestamps_to_ms=False, field=None): @@ -284,6 +284,31 @@ class TestPandasConversion(unittest.TestCase): self._check_pandas_roundtrip(df, timestamps_to_ms=False, expected_schema=schema) + def test_timestamps_with_timezone(self): + df = pd.DataFrame({ + 'datetime64': np.array([ + '2007-07-13T01:23:34.123', + '2006-01-13T12:34:56.432', + '2010-08-13T05:46:57.437'], + dtype='datetime64[ms]') + }) + df_est = df['datetime64'].dt.tz_localize('US/Eastern').to_frame() + df_utc = df_est['datetime64'].dt.tz_convert('UTC').to_frame() + self._check_pandas_roundtrip(df_est, expected=df_utc, timestamps_to_ms=True, check_dtype=False) + + # drop-in a null and ns instead of ms + df = pd.DataFrame({ + 'datetime64': np.array([ + '2007-07-13T01:23:34.123456789', + None, + '2006-01-13T12:34:56.432539784', + '2010-08-13T05:46:57.437699912'], + dtype='datetime64[ns]') + }) + df_est = df['datetime64'].dt.tz_localize('US/Eastern').to_frame() + df_utc = df_est['datetime64'].dt.tz_convert('UTC').to_frame() + self._check_pandas_roundtrip(df_est, expected=df_utc, timestamps_to_ms=False, check_dtype=False) + def test_date(self): df = pd.DataFrame({ 'date': [datetime.date(2000, 1, 1),
