This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 6ffc45a5bd38 [SPARK-55114][PYTHON][TESTS][FOLLOW-UP] Update the result
format to be more friendly to markdown
6ffc45a5bd38 is described below
commit 6ffc45a5bd387152d7d3a0e9e61eaadabb91de14
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Jan 29 20:45:23 2026 +0800
[SPARK-55114][PYTHON][TESTS][FOLLOW-UP] Update the result format to be more
friendly to markdown
### What changes were proposed in this pull request?
Aroid an item being rendered as a link `[xxx](xxx)`
### Why are the changes needed?
to make the golden file more friendly to markdown
### Does this PR introduce _any_ user-facing change?
No, test-only
before:
<img width="457" height="172" alt="image"
src="https://github.com/user-attachments/assets/b912a5ff-5afe-444f-ad32-c0700ea7b4e3"
/>
after:
<img width="532" height="166" alt="image"
src="https://github.com/user-attachments/assets/7fe75e0e-8fa8-49f7-9dac-a80b165f9e06"
/>
### How was this patch tested?
manually check
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #54053 from zhengruifeng/golden_file_update.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
...den_python_udf_return_type_coercion_vanilla.csv | 6 ++--
...lden_python_udf_return_type_coercion_vanilla.md | 34 +++++++++++-----------
..._python_udf_return_type_coercion_with_arrow.csv | 4 +--
...n_python_udf_return_type_coercion_with_arrow.md | 34 +++++++++++-----------
..._return_type_coercion_with_arrow_and_pandas.csv | 4 +--
...f_return_type_coercion_with_arrow_and_pandas.md | 34 +++++++++++-----------
.../tests/coercion/test_pandas_udf_input_type.py | 9 +++---
.../tests/coercion/test_pandas_udf_return_type.py | 5 ++--
.../tests/coercion/test_python_udf_return_type.py | 9 +++---
9 files changed, 71 insertions(+), 68 deletions(-)
diff --git
a/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_vanilla.csv
b/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_vanilla.csv
index 9317421a7e84..2fa44bdc57ee 100644
---
a/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_vanilla.csv
+++
b/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_vanilla.csv
@@ -1,12 +1,12 @@
-SQL Type \ Pandas Value(Type) None(NoneType) True(bool) 1(int) a(str)
1970-01-01(date) 1970-01-01 00:00:00(datetime) 1.0(float)
array('i', [1])(array) [1](list) (1,)(tuple)
bytearray(b'ABC')(bytearray) 1(Decimal) {'a': 1}(dict)
Row(kwargs=1)(Row) Row(namedtuple=1)(Row)
+SQL Type \ Pandas Value(Type) None@NoneType True@bool 1@int a@str
1970-01-01@date 1970-01-01 00:00:00@datetime 1.0@float array('i',
[1])@array [1]@list (1,)@tuple bytearray(b'ABC')@bytearray
1@Decimal {'a': 1}@dict Row(kwargs=1)@Row Row(namedtuple=1)@Row
boolean None True None None None None None None
None None None None None X X
tinyint None None 1 None None None None None
None None None None None X X
smallint None None 1 None None None None None
None None None None None X X
int None None 1 None None None None None None
None None None None X X
bigint None None 1 None None None None None None
None None None None X X
-string None 'true' '1' 'a' 'java.util.GregorianCalendar[t
'java.util.GregorianCalendar[t '1.0' '[I@<hash>' '[1]'
'[Ljava.lang.Object;@<hash>' '[B@<hash>' '1' '{a=1}' X X
+string None 'true' '1' 'a'
'java.util.GregorianCalendar[time=?,areF
'java.util.GregorianCalendar[time=?,areF '1.0' '[I@<hash>' '[1]'
'[Ljava.lang.Object;@<hash>' '[B@<hash>' '1' '{a=1}' X X
date None X X X datetime.date(1970, 1, 1)
datetime.date(1970, 1, 1) X X X X X X
X X X
-timestamp None X X X X datetime.datetime(1970,
1, 1, X X X X X X X X X
+timestamp None X X X X datetime.datetime(1970,
1, 1, 0, 0) X X X X X X X X
X
float None None None None None None 1.0 None None
None None None None X X
double None None None None None None 1.0 None None
None None None None X X
array<int> None None None None None None None [1]
[1] [1] [65, 66, 67] None None X X
diff --git
a/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_vanilla.md
b/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_vanilla.md
index e1be697d3942..02f9274446d5 100644
---
a/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_vanilla.md
+++
b/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_vanilla.md
@@ -1,17 +1,17 @@
-| SQL Type \ Pandas Value(Type) | None(NoneType) | True(bool) | 1(int)
| a(str) | 1970-01-01(date) | 1970-01-01 00:00:00(datetime) |
1.0(float) | array('i', [1])(array) | [1](list) | (1,)(tuple)
| bytearray(b'ABC')(bytearray) | 1(Decimal) | {'a': 1}(dict) |
Row(kwargs=1)(Row) | Row(namedtuple=1)(Row) |
-|---------------------------------|------------------|--------------|----------|----------|--------------------------------|---------------------------------|--------------|--------------------------|-------------|------------------------------|--------------------------------|--------------|------------------|----------------------|--------------------------|
-| boolean | None | True | None
| None | None | None |
None | None | None | None
| None | None | None | X
| X |
-| tinyint | None | None | 1
| None | None | None |
None | None | None | None
| None | None | None | X
| X |
-| smallint | None | None | 1
| None | None | None |
None | None | None | None
| None | None | None | X
| X |
-| int | None | None | 1
| None | None | None |
None | None | None | None
| None | None | None | X
| X |
-| bigint | None | None | 1
| None | None | None |
None | None | None | None
| None | None | None | X
| X |
-| string | None | 'true' | '1'
| 'a' | 'java.util.GregorianCalendar[t | 'java.util.GregorianCalendar[t |
'1.0' | '[I@<hash>' | '[1]' |
'[Ljava.lang.Object;@<hash>' | '[B@<hash>' | '1' |
'{a=1}' | X | X |
-| date | None | X | X
| X | datetime.date(1970, 1, 1) | datetime.date(1970, 1, 1) |
X | X | X | X
| X | X | X | X
| X |
-| timestamp | None | X | X
| X | X | datetime.datetime(1970, 1, 1, |
X | X | X | X
| X | X | X | X
| X |
-| float | None | None | None
| None | None | None |
1.0 | None | None | None
| None | None | None | X
| X |
-| double | None | None | None
| None | None | None |
1.0 | None | None | None
| None | None | None | X
| X |
-| array<int> | None | None | None
| None | None | None |
None | [1] | [1] | [1]
| [65, 66, 67] | None | None | X
| X |
-| binary | None | None | None
| b'a' | None | None |
None | None | None | None
| b'ABC' | None | None | X
| X |
-| decimal(10,0) | None | None | None
| None | None | None |
None | None | None | None
| None | Decimal('1') | None | X
| X |
-| map<string,int> | None | None | None
| None | None | None |
None | None | None | None
| None | None | {'a': 1} | X
| X |
-| struct<_1:int> | None | X | X
| X | X | X |
X | X | Row(_1=1) | Row(_1=1)
| X | X | Row(_1=None) |
Row(_1=1) | Row(_1=1) |
\ No newline at end of file
+| SQL Type \ Pandas Value(Type) | None@NoneType | True@bool | 1@int |
a@str | 1970-01-01@date | 1970-01-01
00:00:00@datetime | 1.0@float | array('i', [1])@array |
[1]@list | (1,)@tuple | bytearray(b'ABC')@bytearray |
1@Decimal | {'a': 1}@dict | Row(kwargs=1)@Row | Row(namedtuple=1)@Row |
+|---------------------------------|-----------------|-------------|---------|---------|------------------------------------------|------------------------------------------|-------------|-------------------------|------------|------------------------------|-------------------------------|--------------|-----------------|---------------------|-------------------------|
+| boolean | None | True | None |
None | None | None
| None | None | None | None
| None | None | None
| X | X |
+| tinyint | None | None | 1 |
None | None | None
| None | None | None | None
| None | None | None
| X | X |
+| smallint | None | None | 1 |
None | None | None
| None | None | None | None
| None | None | None
| X | X |
+| int | None | None | 1 |
None | None | None
| None | None | None | None
| None | None | None
| X | X |
+| bigint | None | None | 1 |
None | None | None
| None | None | None | None
| None | None | None
| X | X |
+| string | None | 'true' | '1' |
'a' | 'java.util.GregorianCalendar[time=?,areF |
'java.util.GregorianCalendar[time=?,areF | '1.0' | '[I@<hash>'
| '[1]' | '[Ljava.lang.Object;@<hash>' | '[B@<hash>' |
'1' | '{a=1}' | X | X |
+| date | None | X | X |
X | datetime.date(1970, 1, 1) | datetime.date(1970, 1, 1)
| X | X | X | X
| X | X | X
| X | X |
+| timestamp | None | X | X |
X | X | datetime.datetime(1970, 1,
1, 0, 0) | X | X | X | X
| X | X | X
| X | X |
+| float | None | None | None |
None | None | None
| 1.0 | None | None | None
| None | None | None
| X | X |
+| double | None | None | None |
None | None | None
| 1.0 | None | None | None
| None | None | None
| X | X |
+| array<int> | None | None | None |
None | None | None
| None | [1] | [1] | [1]
| [65, 66, 67] | None | None
| X | X |
+| binary | None | None | None |
b'a' | None | None
| None | None | None | None
| b'ABC' | None | None
| X | X |
+| decimal(10,0) | None | None | None |
None | None | None
| None | None | None | None
| None | Decimal('1') | None
| X | X |
+| map<string,int> | None | None | None |
None | None | None
| None | None | None | None
| None | None | {'a': 1}
| X | X |
+| struct<_1:int> | None | X | X |
X | X | X
| X | X | Row(_1=1) | Row(_1=1)
| X | X | Row(_1=None)
| Row(_1=1) | Row(_1=1) |
\ No newline at end of file
diff --git
a/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow.csv
b/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow.csv
index 1d161e5f66bc..a91b9628872a 100644
---
a/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow.csv
+++
b/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow.csv
@@ -1,4 +1,4 @@
-SQL Type \ Pandas Value(Type) None(NoneType) True(bool) 1(int) a(str)
1970-01-01(date) 1970-01-01 00:00:00(datetime) 1.0(float)
array('i', [1])(array) [1](list) (1,)(tuple)
bytearray(b'ABC')(bytearray) 1(Decimal) {'a': 1}(dict)
Row(kwargs=1)(Row) Row(namedtuple=1)(Row)
+SQL Type \ Pandas Value(Type) None@NoneType True@bool 1@int a@str
1970-01-01@date 1970-01-01 00:00:00@datetime 1.0@float array('i',
[1])@array [1]@list (1,)@tuple bytearray(b'ABC')@bytearray
1@Decimal {'a': 1}@dict Row(kwargs=1)@Row Row(namedtuple=1)@Row
boolean None True True X X X True X
X X X X X X X
tinyint None X 1 X X X 1 X
X X X 1 X X X
smallint None X 1 X X X 1 X
X X X 1 X X X
@@ -6,7 +6,7 @@ int None X 1 X 0 X 1
X X X X 1 X X X
bigint None X 1 X X 0 1 X X
X X 1 X X X
string None 'true' '1' 'a' '1970-01-01' '1970-01-01 00:00:00'
'1.0' """array('i', [1])""" '[1]' '(1,)' """bytearray(b'ABC')""" '1'
"""{'a': 1}""" 'Row(kwargs=1)' 'Row(namedtuple=1)'
date None X datetime.date(1970, 1, 2) X
datetime.date(1970, 1, 1) datetime.date(1970, 1, 1)
datetime.date(1970, 1, 2) X X X X
datetime.date(1970, 1, 2) X X X
-timestamp None X X X X datetime.datetime(1970,
1, 1, X X X X X X X X X
+timestamp None X X X X datetime.datetime(1970,
1, 1, 0, 0) X X X X X X X X
X
float None 1.0 1.0 X X X 1.0 X X
X X 1.0 X X X
double None 1.0 1.0 X X X 1.0 X X
X X 1.0 X X X
array<int> None X X X X X X [1]
[1] [1] [65, 66, 67] X X [1] [1]
diff --git
a/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow.md
b/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow.md
index 3d609a94d636..ffb521be1409 100644
---
a/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow.md
+++
b/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow.md
@@ -1,17 +1,17 @@
-| SQL Type \ Pandas Value(Type) | None(NoneType) | True(bool) | 1(int)
| a(str) | 1970-01-01(date) | 1970-01-01
00:00:00(datetime) | 1.0(float) | array('i', [1])(array) |
[1](list) | (1,)(tuple) | bytearray(b'ABC')(bytearray) | 1(Decimal)
| {'a': 1}(dict) | Row(kwargs=1)(Row) | Row(namedtuple=1)(Row) |
-|---------------------------------|------------------|--------------|---------------------------|----------|---------------------------|---------------------------------|---------------------------|--------------------------|-------------|---------------|--------------------------------|---------------------------|------------------|----------------------|--------------------------|
-| boolean | None | True | True
| X | X | X
| True | X | X
| X | X | X |
X | X | X |
-| tinyint | None | X | 1
| X | X | X
| 1 | X | X
| X | X | 1 |
X | X | X |
-| smallint | None | X | 1
| X | X | X
| 1 | X | X
| X | X | 1 |
X | X | X |
-| int | None | X | 1
| X | 0 | X
| 1 | X | X
| X | X | 1 |
X | X | X |
-| bigint | None | X | 1
| X | X | 0
| 1 | X | X
| X | X | 1 |
X | X | X |
-| string | None | 'true' | '1'
| 'a' | '1970-01-01' | '1970-01-01 00:00:00'
| '1.0' | "array('i', [1])" | '[1]'
| '(1,)' | "bytearray(b'ABC')" | '1' |
"{'a': 1}" | 'Row(kwargs=1)' | 'Row(namedtuple=1)' |
-| date | None | X |
datetime.date(1970, 1, 2) | X | datetime.date(1970, 1, 1) |
datetime.date(1970, 1, 1) | datetime.date(1970, 1, 2) | X
| X | X | X |
datetime.date(1970, 1, 2) | X | X | X
|
-| timestamp | None | X | X
| X | X |
datetime.datetime(1970, 1, 1, | X | X
| X | X | X | X
| X | X | X
|
-| float | None | 1.0 | 1.0
| X | X | X
| 1.0 | X | X
| X | X | 1.0 |
X | X | X |
-| double | None | 1.0 | 1.0
| X | X | X
| 1.0 | X | X
| X | X | 1.0 |
X | X | X |
-| array<int> | None | X | X
| X | X | X
| X | [1] | [1]
| [1] | [65, 66, 67] | X |
X | [1] | [1] |
-| binary | None | X | X
| X | X | X
| X | X | X
| X | b'ABC' | X |
X | X | X |
-| decimal(10,0) | None | X | X
| X | X | X
| X | X | X
| X | X | Decimal('1') |
X | X | X |
-| map<string,int> | None | X | X
| X | X | X
| X | X | X
| X | X | X |
{'a': 1} | X | X |
-| struct<_1:int> | None | X | X
| X | X | X
| X | X | X
| Row(_1=1) | X | X |
Row(_1=None) | Row(_1=1) | Row(_1=1) |
\ No newline at end of file
+| SQL Type \ Pandas Value(Type) | None@NoneType | True@bool | 1@int
| a@str | 1970-01-01@date | 1970-01-01
00:00:00@datetime | 1.0@float | array('i', [1])@array
| [1]@list | (1,)@tuple | bytearray(b'ABC')@bytearray | 1@Decimal
| {'a': 1}@dict | Row(kwargs=1)@Row | Row(namedtuple=1)@Row |
+|---------------------------------|-----------------|-------------|---------------------------|---------|---------------------------|-------------------------------------|---------------------------|-------------------------|------------|--------------|-------------------------------|---------------------------|-----------------|---------------------|-------------------------|
+| boolean | None | True | True
| X | X | X
| True | X | X |
X | X | X | X
| X | X |
+| tinyint | None | X | 1
| X | X | X
| 1 | X | X |
X | X | 1 | X
| X | X |
+| smallint | None | X | 1
| X | X | X
| 1 | X | X |
X | X | 1 | X
| X | X |
+| int | None | X | 1
| X | 0 | X
| 1 | X | X |
X | X | 1 | X
| X | X |
+| bigint | None | X | 1
| X | X | 0
| 1 | X | X |
X | X | 1 | X
| X | X |
+| string | None | 'true' | '1'
| 'a' | '1970-01-01' | '1970-01-01 00:00:00'
| '1.0' | "array('i', [1])" | '[1]' |
'(1,)' | "bytearray(b'ABC')" | '1' |
"{'a': 1}" | 'Row(kwargs=1)' | 'Row(namedtuple=1)' |
+| date | None | X |
datetime.date(1970, 1, 2) | X | datetime.date(1970, 1, 1) |
datetime.date(1970, 1, 1) | datetime.date(1970, 1, 2) | X
| X | X | X |
datetime.date(1970, 1, 2) | X | X | X
|
+| timestamp | None | X | X
| X | X | datetime.datetime(1970,
1, 1, 0, 0) | X | X | X
| X | X | X | X
| X | X |
+| float | None | 1.0 | 1.0
| X | X | X
| 1.0 | X | X |
X | X | 1.0 | X
| X | X |
+| double | None | 1.0 | 1.0
| X | X | X
| 1.0 | X | X |
X | X | 1.0 | X
| X | X |
+| array<int> | None | X | X
| X | X | X
| X | [1] | [1] |
[1] | [65, 66, 67] | X | X
| [1] | [1] |
+| binary | None | X | X
| X | X | X
| X | X | X |
X | b'ABC' | X | X
| X | X |
+| decimal(10,0) | None | X | X
| X | X | X
| X | X | X |
X | X | Decimal('1') | X
| X | X |
+| map<string,int> | None | X | X
| X | X | X
| X | X | X |
X | X | X |
{'a': 1} | X | X |
+| struct<_1:int> | None | X | X
| X | X | X
| X | X | X |
Row(_1=1) | X | X |
Row(_1=None) | Row(_1=1) | Row(_1=1) |
\ No newline at end of file
diff --git
a/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow_and_pandas.csv
b/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow_and_pandas.csv
index e829bcf6ca29..9fb88155d3f5 100644
---
a/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow_and_pandas.csv
+++
b/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow_and_pandas.csv
@@ -1,4 +1,4 @@
-SQL Type \ Pandas Value(Type) None(NoneType) True(bool) 1(int) a(str)
1970-01-01(date) 1970-01-01 00:00:00(datetime) 1.0(float)
array('i', [1])(array) [1](list) (1,)(tuple)
bytearray(b'ABC')(bytearray) 1(Decimal) {'a': 1}(dict)
Row(kwargs=1)(Row) Row(namedtuple=1)(Row)
+SQL Type \ Pandas Value(Type) None@NoneType True@bool 1@int a@str
1970-01-01@date 1970-01-01 00:00:00@datetime 1.0@float array('i',
[1])@array [1]@list (1,)@tuple bytearray(b'ABC')@bytearray
1@Decimal {'a': 1}@dict Row(kwargs=1)@Row Row(namedtuple=1)@Row
boolean None True True X X X True X
X X X X X X X
tinyint None 1 1 X X X 1 X
X X X 1 X X X
smallint None 1 1 X X X 1 X
X X X 1 X X X
@@ -6,7 +6,7 @@ int None 1 1 X 0 X 1
X X X X 1 X X X
bigint None 1 1 X X 0 1 X X
X X 1 X X X
string None 'True' '1' 'a' '1970-01-01' '1970-01-01 00:00:00'
'1.0' """array('i', [1])""" '[1]' '(1,)' """bytearray(b'ABC')""" '1'
"""{'a': 1}""" 'Row(kwargs=1)' 'Row(namedtuple=1)'
date None X X X datetime.date(1970, 1, 1)
datetime.date(1970, 1, 1) X X X X X
datetime.date(1970, 1, 2) X X X
-timestamp None X datetime.datetime(1969, 12, 31 X X
datetime.datetime(1970, 1, 1, X X X X X
datetime.datetime(1969, 12, 31 X X X
+timestamp None X datetime.datetime(1969, 12, 31, 16, 0, 0
X X datetime.datetime(1970, 1, 1, 0, 0) X X X
X X datetime.datetime(1969, 12, 31, 16, 0, 0 X X
X
float None 1.0 1.0 X X X 1.0 X X
X X 1.0 X X X
double None 1.0 1.0 X X X 1.0 X X
X X 1.0 X X X
array<int> None X X X X X X [1]
[1] [1] [65, 66, 67] X X [1] [1]
diff --git
a/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow_and_pandas.md
b/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow_and_pandas.md
index 7222f6b9363c..504124579bbf 100644
---
a/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow_and_pandas.md
+++
b/python/pyspark/sql/tests/coercion/golden_python_udf_return_type_coercion_with_arrow_and_pandas.md
@@ -1,17 +1,17 @@
-| SQL Type \ Pandas Value(Type) | None(NoneType) | True(bool) | 1(int)
| a(str) | 1970-01-01(date) | 1970-01-01
00:00:00(datetime) | 1.0(float) | array('i', [1])(array) | [1](list) |
(1,)(tuple) | bytearray(b'ABC')(bytearray) | 1(Decimal)
| {'a': 1}(dict) | Row(kwargs=1)(Row) | Row(namedtuple=1)(Row) |
-|---------------------------------|------------------|--------------|--------------------------------|----------|---------------------------|---------------------------------|--------------|--------------------------|-------------|---------------|--------------------------------|--------------------------------|------------------|----------------------|--------------------------|
-| boolean | None | True | True
| X | X | X
| True | X | X | X
| X | X | X
| X | X |
-| tinyint | None | 1 | 1
| X | X | X
| 1 | X | X | X
| X | 1 | X
| X | X |
-| smallint | None | 1 | 1
| X | X | X
| 1 | X | X | X
| X | 1 | X
| X | X |
-| int | None | 1 | 1
| X | 0 | X
| 1 | X | X | X
| X | 1 | X
| X | X |
-| bigint | None | 1 | 1
| X | X | 0
| 1 | X | X | X
| X | 1 | X
| X | X |
-| string | None | 'True' | '1'
| 'a' | '1970-01-01' | '1970-01-01
00:00:00' | '1.0' | "array('i', [1])" | '[1]' |
'(1,)' | "bytearray(b'ABC')" | '1'
| "{'a': 1}" | 'Row(kwargs=1)' | 'Row(namedtuple=1)' |
-| date | None | X | X
| X | datetime.date(1970, 1, 1) |
datetime.date(1970, 1, 1) | X | X | X
| X | X | datetime.date(1970,
1, 2) | X | X | X
|
-| timestamp | None | X |
datetime.datetime(1969, 12, 31 | X | X |
datetime.datetime(1970, 1, 1, | X | X | X
| X | X |
datetime.datetime(1969, 12, 31 | X | X | X
|
-| float | None | 1.0 | 1.0
| X | X | X
| 1.0 | X | X | X
| X | 1.0 | X
| X | X |
-| double | None | 1.0 | 1.0
| X | X | X
| 1.0 | X | X | X
| X | 1.0 | X
| X | X |
-| array<int> | None | X | X
| X | X | X
| X | [1] | [1] | [1]
| [65, 66, 67] | X | X
| [1] | [1] |
-| binary | None | b'\x00' | b'\x00'
| X | X | X
| X | b'\x01\x00\x00\x00' | b'\x01' |
b'\x01' | b'ABC' | X
| X | b'\x01' | b'\x01' |
-| decimal(10,0) | None | X | X
| X | X | X
| Decimal('1') | X | X | X
| X | Decimal('1') | X
| X | X |
-| map<string,int> | None | X | X
| X | X | X
| X | X | X | X
| X | X |
{'a': 1} | X | X |
-| struct<_1:int> | None | X | X
| X | X | X
| X | Row(_1=1) | Row(_1=1) |
Row(_1=1) | Row(_1=65) | X
| Row(_1=None) | Row(_1=1) | Row(_1=1) |
\ No newline at end of file
+| SQL Type \ Pandas Value(Type) | None@NoneType | True@bool | 1@int
| a@str | 1970-01-01@date |
1970-01-01 00:00:00@datetime | 1.0@float | array('i', [1])@array |
[1]@list | (1,)@tuple | bytearray(b'ABC')@bytearray | 1@Decimal
| {'a': 1}@dict | Row(kwargs=1)@Row |
Row(namedtuple=1)@Row |
+|---------------------------------|-----------------|-------------|------------------------------------------|---------|---------------------------|-------------------------------------|--------------|-------------------------|------------|--------------|-------------------------------|------------------------------------------|-----------------|---------------------|-------------------------|
+| boolean | None | True | True
| X | X | X
| True | X | X
| X | X | X
| X | X | X |
+| tinyint | None | 1 | 1
| X | X | X
| 1 | X | X
| X | X | 1
| X | X | X |
+| smallint | None | 1 | 1
| X | X | X
| 1 | X | X
| X | X | 1
| X | X | X |
+| int | None | 1 | 1
| X | 0 | X
| 1 | X | X
| X | X | 1
| X | X | X |
+| bigint | None | 1 | 1
| X | X | 0
| 1 | X | X
| X | X | 1
| X | X | X |
+| string | None | 'True' | '1'
| 'a' | '1970-01-01' |
'1970-01-01 00:00:00' | '1.0' | "array('i', [1])" |
'[1]' | '(1,)' | "bytearray(b'ABC')" | '1'
| "{'a': 1}" | 'Row(kwargs=1)' |
'Row(namedtuple=1)' |
+| date | None | X | X
| X | datetime.date(1970, 1, 1) |
datetime.date(1970, 1, 1) | X | X |
X | X | X | datetime.date(1970,
1, 2) | X | X | X
|
+| timestamp | None | X |
datetime.datetime(1969, 12, 31, 16, 0, 0 | X | X
| datetime.datetime(1970, 1, 1, 0, 0) | X | X
| X | X | X |
datetime.datetime(1969, 12, 31, 16, 0, 0 | X | X
| X |
+| float | None | 1.0 | 1.0
| X | X | X
| 1.0 | X | X
| X | X | 1.0
| X | X | X |
+| double | None | 1.0 | 1.0
| X | X | X
| 1.0 | X | X
| X | X | 1.0
| X | X | X |
+| array<int> | None | X | X
| X | X | X
| X | [1] | [1]
| [1] | [65, 66, 67] | X
| X | [1] | [1] |
+| binary | None | b'\x00' | b'\x00'
| X | X | X
| X | b'\x01\x00\x00\x00' | b'\x01'
| b'\x01' | b'ABC' | X
| X | b'\x01' | b'\x01' |
+| decimal(10,0) | None | X | X
| X | X | X
| Decimal('1') | X | X
| X | X | Decimal('1')
| X | X | X |
+| map<string,int> | None | X | X
| X | X | X
| X | X | X
| X | X | X
| {'a': 1} | X | X |
+| struct<_1:int> | None | X | X
| X | X | X
| X | Row(_1=1) | Row(_1=1)
| Row(_1=1) | Row(_1=65) | X
| Row(_1=None) | Row(_1=1) | Row(_1=1) |
\ No newline at end of file
diff --git a/python/pyspark/sql/tests/coercion/test_pandas_udf_input_type.py
b/python/pyspark/sql/tests/coercion/test_pandas_udf_input_type.py
index 005a8f62945a..cd3880e6c9dd 100644
--- a/python/pyspark/sql/tests/coercion/test_pandas_udf_input_type.py
+++ b/python/pyspark/sql/tests/coercion/test_pandas_udf_input_type.py
@@ -312,13 +312,14 @@ class PandasUDFInputTypeTests(ReusedSQLTestCase):
types = [row["python_type"] for row in results_data]
result.append(str(types))
- result.append(str(values).replace("\n", " "))
+ result.append(str(values))
except Exception as e:
print("error_msg", e)
- # Clean up exception message to remove newlines and extra
whitespace
- e = str(e).replace("\n", " ").replace("\r", " ").replace("\t",
" ")
- result.append(f"✗ {e}")
+ result.append(f"✗ {str(e)}")
+
+ # Clean up exception message to remove newlines and extra
whitespace
+ result = [r.replace("\n", " ").replace("\r", " ").replace("\t", "
") for r in result]
error_msg = None
if testing and result != list(golden.iloc[idx]):
diff --git a/python/pyspark/sql/tests/coercion/test_pandas_udf_return_type.py
b/python/pyspark/sql/tests/coercion/test_pandas_udf_return_type.py
index 9982717d6e07..f0c81adb1010 100644
--- a/python/pyspark/sql/tests/coercion/test_pandas_udf_return_type.py
+++ b/python/pyspark/sql/tests/coercion/test_pandas_udf_return_type.py
@@ -226,12 +226,13 @@ class PandasUDFReturnTypeTests(ReusedSQLTestCase):
.collect()
)
result = repr([row[0] for row in rows])
- # "\t" is used as the delimiter
- result = result.replace("\t", "")
result = result[:40]
except Exception:
result = "X"
+ # Clean up exception message to remove newlines and extra
whitespace
+ result = result.replace("\n", " ").replace("\r", "
").replace("\t", " ")
+
err = None
if testing:
expected = golden.loc[str_t, str_v]
diff --git a/python/pyspark/sql/tests/coercion/test_python_udf_return_type.py
b/python/pyspark/sql/tests/coercion/test_python_udf_return_type.py
index c6ccf3eee80a..08ba2f809f50 100644
--- a/python/pyspark/sql/tests/coercion/test_python_udf_return_type.py
+++ b/python/pyspark/sql/tests/coercion/test_python_udf_return_type.py
@@ -148,7 +148,7 @@ class UDFReturnTypeTests(ReusedSQLTestCase):
return spark_type.simpleString()
def repr_value(self, value):
- return f"{str(value)}({type(value).__name__})"
+ return f"{str(value)}@{type(value).__name__}"
def test_str_repr(self):
self.assertEqual(
@@ -223,12 +223,13 @@ class UDFReturnTypeTests(ReusedSQLTestCase):
result = repr(row[0])
# Normalize Java object hash codes to make tests deterministic
result = re.sub(r"@[a-fA-F0-9]+", "@<hash>", result)
- # "\t" is used as the delimiter
- result = result.replace("\t", "")
- result = result[:30]
+ result = result[:40]
except Exception:
result = "X"
+ # Clean up exception message to remove newlines and extra
whitespace
+ result = result.replace("\n", " ").replace("\r", "
").replace("\t", " ")
+
err = None
if testing:
expected = golden.loc[str_t, str_v]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]
