Github user BryanCutler commented on a diff in the pull request:
https://github.com/apache/spark/pull/18664#discussion_r147207020
--- Diff: python/pyspark/sql/types.py ---
@@ -1619,11 +1619,38 @@ def to_arrow_type(dt):
arrow_type = pa.decimal(dt.precision, dt.scale)
elif type(dt) == StringType:
arrow_type = pa.string()
+ elif type(dt) == DateType:
+ arrow_type = pa.date32()
+ elif type(dt) == TimestampType:
+ # Timestamps should be in UTC, JVM Arrow timestamps require a
timezone to be read
+ arrow_type = pa.timestamp('us', tz='UTC')
else:
raise TypeError("Unsupported type in conversion to Arrow: " +
str(dt))
return arrow_type
+def _check_dataframe_localize_timestamps(df):
+ """ Convert timezone aware timestamps to timezone-naive in local time
+ """
+ from pandas.api.types import is_datetime64tz_dtype
+ for column, series in df.iteritems():
+ # TODO: handle nested timestamps, such as
ArrayType(TimestampType())?
+ if is_datetime64tz_dtype(series.dtype):
+ df[column] =
series.dt.tz_convert('tzlocal()').dt.tz_localize(None)
+ return df
+
+
+def _check_series_convert_timestamps_internal(s):
+ """ Convert a tz-naive timestamp in local tz to UTC normalized for
Spark internal storage
+ """
+ from pandas.api.types import is_datetime64_dtype
+ # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
+ if is_datetime64_dtype(s.dtype):
+ return s.dt.tz_localize('tzlocal()').dt.tz_convert('UTC')
+ else:
+ return s
--- End diff --
Here is what I found, for the actual internal data it doesn't matter.
Changing the timezone on a series is just a metadata operation so the same data
will be transferred back to Spark regardless
```
In [101]: ts = pd.Timestamp(1, unit='D', tz='America/New_York')
In [102]: ts.value
Out[102]: 86400000000000
In [103]: ts.tz_convert('UTC').value
Out[103]: 86400000000000
```
However, to be consistent we should make sure the tz is UTC so I'll add
this along with a test to make sure.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]