zhengruifeng commented on code in PR #54084:
URL: https://github.com/apache/spark/pull/54084#discussion_r2757603919


##########
python/pyspark/sql/tests/coercion/golden_pandas_udf_input_type_coercion_base.csv:
##########
@@ -1,40 +1,40 @@
-       Test Case       Spark Type      Spark Value     Python Type     Python 
Value
-0      byte_values     tinyint [-128, 127, 0]  ['int8', 'int8', 'int8']        
[-128, 127, 0]
-1      byte_null       tinyint [None, 42]      ['Int8', 'Int8']        [None, 
42]
-2      short_values    smallint        [-32768, 32767, 0]      ['int16', 
'int16', 'int16']     [-32768, 32767, 0]
-3      short_null      smallint        [None, 123]     ['Int16', 'Int16']      
[None, 123]
-4      int_values      int     [-2147483648, 2147483647, 0]    ['int32', 
'int32', 'int32']     [-2147483648, 2147483647, 0]
-5      int_null        int     [None, 456]     ['Int32', 'Int32']      [None, 
456]
-6      long_values     bigint  [-9223372036854775808, 9223372036854775807, 0]  
['int64', 'int64', 'int64']     [-9223372036854775808, 9223372036854775807, 0]
-7      long_null       bigint  [None, 789]     ['Int64', 'Int64']      [None, 
789]
-8      float_values    float   [0.0, 1.0, 3.140000104904175]   ['float32', 
'float32', 'float32']       [0.0, 1.0, 3.140000104904175]
-9      float_null      float   [None, 3.140000104904175]       ['float32', 
'float32']  [None, 3.140000104904175]
-10     double_values   double  [0.0, 1.0, 0.3333333333333333]  ['float64', 
'float64', 'float64']       [0.0, 1.0, 0.3333333333333333]
-11     double_null     double  [None, 2.71]    ['float64', 'float64']  [None, 
2.71]
-12     decimal_values  decimal(3,2)    [Decimal('5.35'), Decimal('1.23')]      
['object', 'object']    [Decimal('5.35'), Decimal('1.23')]
-13     decimal_null    decimal(3,2)    [None, Decimal('9.99')] ['object', 
'object']    [None, Decimal('9.99')]
-14     string_values   string  ['abc', '', 'hello']    ['object', 'object', 
'object']  ['abc', '', 'hello']
-15     string_null     string  [None, 'test']  ['object', 'object']    [None, 
'test']
-16     binary_values   binary  [b'abc', b'', b'ABC']   ['object', 'object', 
'object']  [b'abc', b'', b'ABC']
-17     binary_null     binary  [None, b'test'] ['object', 'object']    [None, 
b'test']
-18     boolean_values  boolean [True, False]   ['bool', 'bool']        [True, 
False]
-19     boolean_null    boolean [None, True]    ['object', 'object']    [None, 
True]
-20     date_values     date    [datetime.date(2020, 2, 2), datetime.date(1970, 
1, 1)]  ['object', 'object']    [datetime.date(2020, 2, 2), datetime.date(1970, 
1, 1)]
-21     date_null       date    [None, datetime.date(2023, 1, 1)]       
['object', 'object']    [None, datetime.date(2023, 1, 1)]
-22     timestamp_values        timestamp       [datetime.datetime(2020, 2, 2, 
12, 15, 16, 123000)]     ['datetime64[ns]']      [datetime.datetime(2020, 2, 2, 
12, 15, 16, 123000)]
-23     timestamp_null  timestamp       [None, datetime.datetime(2023, 1, 1, 
12, 0)]    ['datetime64[ns]', 'datetime64[ns]']    [None, 
datetime.datetime(2023, 1, 1, 12, 0)]
-24     array_int_values        array<int>      [[1, 2, 3], [], [1, None, 3]]   
['object', 'object', 'object']  [[1, 2, 3], [], [1, None, 3]]
-25     array_int_null  array<int>      [None, [4, 5, 6]]       ['object', 
'object']    [None, [4, 5, 6]]
-26     map_str_int_values      map<string,int> [{'world': 2, 'hello': 1}, {}]  
['object', 'object']    [{'world': 2, 'hello': 1}, {}]
-27     map_str_int_null        map<string,int> [None, {'test': 123}]   
['object', 'object']    [None, {'test': 123}]
-28     struct_int_str_values   struct<a1:int,a2:string>        [Row(a1=1, 
a2='hello'), Row(a1=2, a2='world')]  ['DataFrame', 'DataFrame']      [Row(a1=1, 
a2='hello'), Row(a1=2, a2='world')]
-29     struct_int_str_null     struct<a1:int,a2:string>        [None, 
Row(a1=99, a2='test')]   ['DataFrame', 'DataFrame']      [Row(a1=None, 
a2=None), Row(a1=99, a2='test')]
-30     array_array_int array<array<int>>       [[[1, 2, 3]], [[1], [2, 3]]]    
['object', 'object']    [[[1, 2, 3]], [[1], [2, 3]]]
-31     array_map_str_int       array<map<string,int>>  [[{'world': 2, 'hello': 
1}], [{'a': 1}, {'b': 2}]]      ['object', 'object']    [[{'world': 2, 'hello': 
1}], [{'a': 1}, {'b': 2}]]
-32     array_struct_int_str    array<struct<a1:int,a2:string>> [[Row(a1=1, 
a2='hello')], [Row(a1=1, a2='hello'), Row(a1=2, a2='world')]]       ['object', 
'object']    [[Row(a1=1, a2='hello')], [Row(a1=1, a2='hello'), Row(a1=2, 
a2='world')]]
-33     map_int_array_int       map<int,array<int>>     [{1: [1, 2, 3]}, {1: 
[1], 2: [2, 3]}]   ['object', 'object']    [{1: [1, 2, 3]}, {1: [1], 2: [2, 3]}]
-34     map_int_map_str_int     map<int,map<string,int>>        [{1: {'world': 
2, 'hello': 1}}] ['object']      [{1: {'world': 2, 'hello': 1}}]
-35     map_int_struct_int_str  map<int,struct<a1:int,a2:string>>       [{1: 
Row(a1=1, a2='hello')}]    ['object']      [{1: Row(a1=1, a2='hello')}]
-36     struct_int_array_int    struct<a:int,b:array<int>>      [Row(a=1, b=[1, 
2, 3])] ['DataFrame']   [Row(a=1, b=[1, 2, 3])]
-37     struct_int_map_str_int  struct<a:int,b:map<string,int>> [Row(a=1, 
b={'world': 2, 'hello': 1})]  ['DataFrame']   [Row(a=1, b={'world': 2, 'hello': 
1})]
-38     struct_int_struct_int_str       
struct<a:int,b:struct<a1:int,a2:string>>        [Row(a=1, b=Row(a1=1, 
a2='hello'))]     ['DataFrame']   [Row(a=1, b=Row(a1=1, a2='hello'))]
+Source Value \ Target Type     Spark Type      Spark Value     Python Type     
Python Value
+byte_values    tinyint [-128, 127, 0]  int8    [-128, 127, 0]
+byte_null      tinyint [None, 42]      Int8    [None, 42]
+short_values   smallint        [-32768, 32767, 0]      int16   [-32768, 32767, 
0]
+short_null     smallint        [None, 123]     Int16   [None, 123]
+int_values     int     [-2147483648, 2147483647, 0]    int32   [-2147483648, 
2147483647, 0]
+int_null       int     [None, 456]     Int32   [None, 456]
+long_values    bigint  [-9223372036854775808, 9223372036854775807, 0]  int64   
[-9223372036854775808, 9223372036854775807, 0]
+long_null      bigint  [None, 789]     Int64   [None, 789]
+float_values   float   [0.0, 1.0, 3.140000104904175]   float32 [0.0, 1.0, 
3.140000104904175]
+float_null     float   [None, 3.140000104904175]       float32 [None, 
3.140000104904175]
+double_values  double  [0.0, 1.0, 0.3333333333333333]  float64 [0.0, 1.0, 
0.3333333333333333]
+double_null    double  [None, 2.71]    float64 [None, 2.71]
+decimal_values decimal(3,2)    [Decimal('5.35'), Decimal('1.23')]      Decimal 
[Decimal('5.35'), Decimal('1.23')]

Review Comment:
   @Yicong-Huang is the change `['object', 'object']` -> `Decimal` expected?
   
   I think it should be the dtypes of pdf here?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to