BryanCutler commented on a change in pull request #24930: [SPARK-28132][PYTHON] Update document type conversion for Pandas UDFs (pyarrow 0.13.0, pandas 0.24.2, Python 3.7) URL: https://github.com/apache/spark/pull/24930#discussion_r296325451
##########
File path: python/pyspark/sql/functions.py
##########
@@ -3118,37 +3118,34 @@ def pandas_udf(f=None, returnType=None,
functionType=None):
# The following table shows most of Pandas data and SQL type conversions
in Pandas UDFs that
# are not yet visible to the user. Some of behaviors are buggy and might
be changed in the near
# future. The table might have to be eventually documented externally.
- # Please see SPARK-25798's PR to see the codes in order to generate the
table below.
+ # Please see SPARK-28132's PR to see the codes in order to generate the
table below.
#
- #
+-----------------------------+----------------------+----------+-------+--------+--------------------+--------------------+--------+---------+---------+---------+------------+------------+------------+-----------------------------------+-----------------------------------------------------+-----------------+--------------------+-----------------------------+-------------+-----------------+------------------+-----------+--------------------------------+
# noqa
- # |SQL Type \ Pandas
Value(Type)|None(object(NoneType))|True(bool)|1(int8)|1(int16)|
1(int32)|
1(int64)|1(uint8)|1(uint16)|1(uint32)|1(uint64)|1.0(float16)|1.0(float32)|1.0(float64)|1970-01-01
00:00:00(datetime64[ns])|1970-01-01 00:00:00-05:00(datetime64[ns,
US/Eastern])|a(object(string))| 1(object(Decimal))|[1 2
3](object(array[int32]))|1.0(float128)|(1+0j)(complex64)|(1+0j)(complex128)|A(category)|1
days 00:00:00(timedelta64[ns])| # noqa
- #
+-----------------------------+----------------------+----------+-------+--------+--------------------+--------------------+--------+---------+---------+---------+------------+------------+------------+-----------------------------------+-----------------------------------------------------+-----------------+--------------------+-----------------------------+-------------+-----------------+------------------+-----------+--------------------------------+
# noqa
- # | boolean| None| True|
True| True| True| True| True| True|
True| True| False| False| False|
False| False|
X| X| X| False|
False| False| X| False| # noqa
- # | tinyint| None| 1|
1| 1| 1| 1| X| X|
X| X| 1| 1| 1|
X| X| X|
X| X| X|
X| X| 0| X| # noqa
- # | smallint| None| 1|
1| 1| 1| 1| 1| X|
X| X| 1| 1| 1|
X| X| X|
X| X| X|
X| X| X| X| # noqa
- # | int| None| 1|
1| 1| 1| 1| 1| 1|
X| X| 1| 1| 1|
X| X| X|
X| X| X|
X| X| X| X| # noqa
- # | bigint| None| 1|
1| 1| 1| 1| 1| 1|
1| X| 1| 1| 1|
0| 18000000000000| X|
X| X| X|
X| X| X| X| # noqa
- # | float| None| 1.0|
1.0| 1.0| 1.0| 1.0| 1.0| 1.0|
1.0| 1.0| 1.0| 1.0| 1.0|
X| X|
X|1.401298464324817...| X| X|
X| X| X| X| # noqa
- # | double| None| 1.0|
1.0| 1.0| 1.0| 1.0| 1.0| 1.0|
1.0| 1.0| 1.0| 1.0| 1.0|
X| X|
X| X| X| X|
X| X| X| X| # noqa
- # | date| None| X|
X| X|datetime.date(197...| X| X| X|
X| X| X| X| X|
datetime.date(197...| X|
X| X| X| X|
X| X| X|
X| # noqa
- # | timestamp| None| X|
X| X| X|datetime.datetime...| X| X|
X| X| X| X| X|
datetime.datetime...| datetime.datetime...|
X| X| X| X|
X| X| X|
X| # noqa
- # | string| None|
u''|u'\x01'| u'\x01'| u'\x01'| u'\x01'| u'\x01'|
u'\x01'| u'\x01'| u'\x01'| u''| u''| u''|
X| X|
u'a'| X| X|
u''| u''| u''| X|
X| # noqa
- # | decimal(10,0)| None| X|
X| X| X| X| X| X|
X| X| X| X| X|
X| X| X|
Decimal('1')| X| X|
X| X| X| X| # noqa
- # | array<int>| None| X|
X| X| X| X| X| X|
X| X| X| X| X|
X| X| X|
X| [1, 2, 3]| X|
X| X| X| X| # noqa
- # | map<string,int>| X| X|
X| X| X| X| X| X|
X| X| X| X| X|
X| X| X|
X| X| X|
X| X| X| X| # noqa
- # | struct<_1:int>| X| X|
X| X| X| X| X| X|
X| X| X| X| X|
X| X| X|
X| X| X|
X| X| X| X| # noqa
- # | binary| X| X|
X| X| X| X| X| X|
X| X| X| X| X|
X| X| X|
X| X| X|
X| X| X| X| # noqa
- #
+-----------------------------+----------------------+----------+-------+--------+--------------------+--------------------+--------+---------+---------+---------+------------+------------+------------+-----------------------------------+-----------------------------------------------------+-----------------+--------------------+-----------------------------+-------------+-----------------+------------------+-----------+--------------------------------+
# noqa
+ #

# noqa
+ # |SQL Type \ Pandas Value(Type)|None(object(NoneType))|
True(bool)| 1(int8)| 1(int16)| 1(int32)|
1(int64)| 1(uint8)| 1(uint16)| 1(uint32)|
1(uint64)| 1.0(float16)| 1.0(float32)| 1.0(float64)|1970-01-01
00:00:00(datetime64[ns])|1970-01-01 00:00:00-05:00(datetime64[ns,
US/Eastern])|a(object(string))| 1(object(Decimal))|[1 2
3](object(array[int32]))|
1.0(float128)|(1+0j)(complex64)|(1+0j)(complex128)|A(category)|1 days
00:00:00(timedelta64[ns])| # noqa
+ #

# noqa
+ # | boolean| None|
True| True| True| True|
True| True| True| True|
True| True| True| True|
X| X| X|
X| X| X|
X| X| X| X| # noqa
+ # | tinyint| None|
1| 1| 1| 1|
1| 1| 1| 1| 1|
1| 1| 1| X|
X| X|
1| X| X| X|
X| 0| X| # noqa
+ # | smallint| None|
1| 1| 1| 1|
1| 1| 1| 1| 1|
1| 1| 1| X|
X| X|
1| X| X| X|
X| X| X| # noqa
+ # | int| None|
1| 1| 1| 1|
1| 1| 1| 1| 1|
1| 1| 1| X|
X| X|
1| X| X| X|
X| X| X| # noqa
+ # | bigint| None|
1| 1| 1| 1|
1| 1| 1| 1| 1|
1| 1| 1| 0|
18000000000000| X|
1| X| X| X|
X| X| X| # noqa
+ # | float| None|
1.0| 1.0| 1.0| 1.0|
1.0| 1.0| 1.0| 1.0|
1.0| 1.0| 1.0| 1.0|
X| X| X|
X| X| X|
X| X| X| X| # noqa
+ # | double| None|
1.0| 1.0| 1.0| 1.0|
1.0| 1.0| 1.0| 1.0|
1.0| 1.0| 1.0| 1.0|
X| X| X|
X| X| X|
X| X| X| X| # noqa
+ # | date| None|
X| X| X|datetime.date(197...|
X| X| X| X| X|
X| X| X| datetime.date(197...|
datetime.date(197...|
X|datetime.date(197...| X| X|
X| X| X| X| # noqa
+ # | timestamp| None|
X| X| X|
X|datetime.datetime...| X| X|
X| X| X| X| X|
datetime.datetime...| datetime.datetime...|
X|datetime.datetime...| X|
X| X| X| X|
X| # noqa
+ # | string| None|
''| ''| ''| '\x01'|
'\x01'| ''| ''| '\x01'|
'\x01'| ''| ''| ''|
X| X|
'a'| X| X| ''|
X| ''| X| X| # noqa
Review comment:
Yeah, that is a bit strange.. Let me look into it and maybe file a JIRA
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]
