Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19607#discussion_r153142413
  
    --- Diff: python/pyspark/sql/types.py ---
    @@ -1678,37 +1679,105 @@ def from_arrow_schema(arrow_schema):
              for field in arrow_schema])
     
     
    -def _check_dataframe_localize_timestamps(pdf):
    +def _old_pandas_exception_message(e):
    +    """ Create an error message for importing old Pandas.
         """
    -    Convert timezone aware timestamps to timezone-naive in local time
    +    msg = "note: Pandas (>=0.19.2) must be installed and available on 
calling Python process"
    +    return "%s\n%s" % (_exception_message(e), msg)
    +
    +
    +def _check_dataframe_localize_timestamps(pdf, timezone):
    +    """
    +    Convert timezone aware timestamps to timezone-naive in the specified 
timezone or local timezone
     
         :param pdf: pandas.DataFrame
    -    :return pandas.DataFrame where any timezone aware columns have be 
converted to tz-naive
    +    :param timezone: the timezone to convert. if None then use local 
timezone
    +    :return pandas.DataFrame where any timezone aware columns have been 
converted to tz-naive
         """
    -    from pandas.api.types import is_datetime64tz_dtype
    +    try:
    +        from pandas.api.types import is_datetime64tz_dtype
    +    except ImportError as e:
    +        raise ImportError(_old_pandas_exception_message(e))
    +    tz = timezone or 'tzlocal()'
         for column, series in pdf.iteritems():
             # TODO: handle nested timestamps, such as 
ArrayType(TimestampType())?
             if is_datetime64tz_dtype(series.dtype):
    -            pdf[column] = 
series.dt.tz_convert('tzlocal()').dt.tz_localize(None)
    +            pdf[column] = series.dt.tz_convert(tz).dt.tz_localize(None)
         return pdf
     
     
    -def _check_series_convert_timestamps_internal(s):
    +def _check_series_convert_timestamps_internal(s, timezone):
         """
    -    Convert a tz-naive timestamp in local tz to UTC normalized for Spark 
internal storage
    +    Convert a tz-naive timestamp in the specified timezone or local 
timezone to UTC normalized for
    +    Spark internal storage
    +
         :param s: a pandas.Series
    +    :param timezone: the timezone to convert. if None then use local 
timezone
         :return pandas.Series where if it is a timestamp, has been UTC 
normalized without a time zone
         """
    -    from pandas.api.types import is_datetime64_dtype, is_datetime64tz_dtype
    +    try:
    +        from pandas.api.types import is_datetime64_dtype, 
is_datetime64tz_dtype
    +    except ImportError as e:
    +        raise ImportError(_old_pandas_exception_message(e))
         # TODO: handle nested timestamps, such as ArrayType(TimestampType())?
         if is_datetime64_dtype(s.dtype):
    -        return s.dt.tz_localize('tzlocal()').dt.tz_convert('UTC')
    +        tz = timezone or 'tzlocal()'
    +        return s.dt.tz_localize(tz).dt.tz_convert('UTC')
         elif is_datetime64tz_dtype(s.dtype):
             return s.dt.tz_convert('UTC')
         else:
             return s
     
     
    +def _check_series_convert_timestamps_localize(s, fromTimezone, toTimezone):
    +    """
    +    Convert timestamp to timezone-naive in the specified timezone or local 
timezone
    +
    +    :param s: a pandas.Series
    +    :param fromTimezone: the timezone to convert from. if None then use 
local timezone
    +    :param toTimezone: the timezone to convert to. if None then use local 
timezone
    +    :return pandas.Series where if it is a timestamp, has been converted 
to tz-naive
    +    """
    +    try:
    +        import pandas as pd
    +        from pandas.api.types import is_datetime64tz_dtype, 
is_datetime64_dtype
    +    except ImportError as e:
    +        raise ImportError(_old_pandas_exception_message(e))
    +    fromTz = fromTimezone or 'tzlocal()'
    --- End diff --
    
    Ditto.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to