Github user BryanCutler commented on a diff in the pull request: https://github.com/apache/spark/pull/18664#discussion_r145485217 --- Diff: python/pyspark/sql/types.py --- @@ -1619,11 +1619,39 @@ def to_arrow_type(dt): arrow_type = pa.decimal(dt.precision, dt.scale) elif type(dt) == StringType: arrow_type = pa.string() + elif type(dt) == DateType: + arrow_type = pa.date32() + elif type(dt) == TimestampType: + # Timestamps should be in UTC, JVM Arrow timestamps require a timezone to be read + arrow_type = pa.timestamp('us', tz='UTC') else: raise TypeError("Unsupported type in conversion to Arrow: " + str(dt)) return arrow_type +def _check_dataframe_localize_timestamps(df): + """ Convert timezone aware timestamps to timezone-naive in local time + """ + from pandas.types.common import is_datetime64tz_dtype --- End diff -- I did see another API for this in pandas 0.20+ but this is the only API I could find that is also in 0.19.2, which is used for Spark Jenkins tests
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org