Github user jreback commented on a diff in the pull request:
https://github.com/apache/spark/pull/18664#discussion_r145654005
--- Diff: python/pyspark/sql/types.py ---
@@ -1619,11 +1619,39 @@ def to_arrow_type(dt):
arrow_type = pa.decimal(dt.precision, dt.scale)
elif type(dt) == StringType:
arrow_type = pa.string()
+ elif type(dt) == DateType:
+ arrow_type = pa.date32()
+ elif type(dt) == TimestampType:
+ # Timestamps should be in UTC, JVM Arrow timestamps require a
timezone to be read
+ arrow_type = pa.timestamp('us', tz='UTC')
else:
raise TypeError("Unsupported type in conversion to Arrow: " +
str(dt))
return arrow_type
+def _check_dataframe_localize_timestamps(df):
+ """ Convert timezone aware timestamps to timezone-naive in local time
+ """
+ from pandas.types.common import is_datetime64tz_dtype
+ for column, series in df.iteritems():
+ # TODO: handle nested timestamps?
+ if is_datetime64tz_dtype(series.dtype):
+ df[column] =
series.dt.tz_convert('tzlocal()').dt.tz_localize(None)
--- End diff --
yep this is idiomatic
```
In [16]: s = Series(pd.date_range('20130101', periods=3, tz='UTC'))
In [17]: s
Out[17]:
0 2013-01-01 00:00:00+00:00
1 2013-01-02 00:00:00+00:00
2 2013-01-03 00:00:00+00:00
dtype: datetime64[ns, UTC]
In [18]: s.dt.tz_convert('tzlocal()')
Out[18]:
0 2012-12-31 19:00:00-05:00
1 2013-01-01 19:00:00-05:00
2 2013-01-02 19:00:00-05:00
dtype: datetime64[ns, tzlocal()]
In [19]: s.dt.tz_convert('tzlocal()').dt.tz_localize(None)
Out[19]:
0 2012-12-31 19:00:00
1 2013-01-01 19:00:00
2 2013-01-02 19:00:00
dtype: datetime64[ns]
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]