Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/18664#discussion_r147127081
  
    --- Diff: python/pyspark/sql/types.py ---
    @@ -1619,11 +1619,38 @@ def to_arrow_type(dt):
             arrow_type = pa.decimal(dt.precision, dt.scale)
         elif type(dt) == StringType:
             arrow_type = pa.string()
    +    elif type(dt) == DateType:
    +        arrow_type = pa.date32()
    +    elif type(dt) == TimestampType:
    +        # Timestamps should be in UTC, JVM Arrow timestamps require a 
timezone to be read
    +        arrow_type = pa.timestamp('us', tz='UTC')
         else:
             raise TypeError("Unsupported type in conversion to Arrow: " + 
str(dt))
         return arrow_type
     
     
    +def _check_dataframe_localize_timestamps(df):
    +    """ Convert timezone aware timestamps to timezone-naive in local time
    --- End diff --
    
    Let's add a comment that says the expected input is `pd.DataFrame`.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to