[
https://issues.apache.org/jira/browse/SPARK-48085?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Hyukjin Kwon updated SPARK-48085:
---------------------------------
Description:
{code}
======================================================================
FAIL [0.169s]: test_checking_csv_header
(pyspark.sql.tests.connect.test_parity_datasources.DataSourcesParityTests.test_checking_csv_header)
----------------------------------------------------------------------
pyspark.errors.exceptions.connect.SparkConnectGrpcException:
(org.apache.spark.SparkException) [FAILED_READ_FILE.NO_HINT] Encountered error
while reading file
file:///home/runner/work/spark/spark-3.5/python/target/38acabf5-710b-4c21-b359-f61619e2adc7/tmpm7qyq23g/part-00000-d6c8793b-772d-44e7-bcca-6eeae9cc0ec7-c000.csv.
SQLSTATE: KD001
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/test_datasources.py",
line
[167](https://github.com/HyukjinKwon/spark/actions/runs/8908464265/job/24464135564#step:9:168),
in test_checking_csv_header
self.assertRaisesRegex(
AssertionError: "CSV header does not conform to the schema" does not match
"(org.apache.spark.SparkException) [FAILED_READ_FILE.NO_HINT] Encountered error
while reading file
file:///home/runner/work/spark/spark-3.5/python/target/38acabf5-710b-4c21-b359-f61619e2adc7/tmpm7qyq23g/part-00000-d6c8793b-772d-44e7-bcca-6eeae9cc0ec7-c000.csv.
SQLSTATE: KD001"
{code}
{code}
======================================================================
ERROR [0.059s]: test_large_variable_types
(pyspark.sql.tests.connect.test_parity_pandas_map.MapInPandasParityTests.test_large_variable_types)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_map.py",
line 115, in test_large_variable_types
actual = df.mapInPandas(func, "str string, bin binary").collect()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/dataframe.py",
line 1645, in collect
table, schema = self._session.client.to_table(query)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 858, in to_table
table, schema, _, _, _ = self._execute_and_fetch(req)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1283, in _execute_and_fetch
for response in self._execute_and_fetch_as_iterator(req):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1264, in _execute_and_fetch_as_iterator
self._handle_error(error)
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1503, in _handle_error
self._handle_rpc_error(error)
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1539, in _handle_rpc_error
raise convert_exception(info, status.message) from None
pyspark.errors.exceptions.connect.IllegalArgumentException:
[INVALID_PARAMETER_VALUE.CHARSET] The value of parameter(s) `charset` in
`encode` is invalid: expects one of the charsets 'US-ASCII', 'ISO-8859-1',
'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', but got utf8. SQLSTATE:
2[202](https://github.com/HyukjinKwon/spark/actions/runs/8909131027/job/24465959134#step:9:203)3
{code}
{code}
======================================================================
ERROR [0.024s]: test_assert_approx_equal_decimaltype_custom_rtol_pass
(pyspark.sql.tests.connect.test_utils.ConnectUtilsTests.test_assert_approx_equal_decimaltype_custom_rtol_pass)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/test_utils.py",
line 279, in test_assert_approx_equal_decimaltype_custom_rtol_pass
assertDataFrameEqual(df1, df2, rtol=1e-1)
File "/home/runner/work/spark/spark-3.5/python/pyspark/testing/utils.py",
line 595, in assertDataFrameEqual
actual_list = actual.collect()
^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/dataframe.py",
line 1645, in collect
table, schema = self._session.client.to_table(query)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 858, in to_table
table, schema, _, _, _ = self._execute_and_fetch(req)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1283, in _execute_and_fetch
for response in self._execute_and_fetch_as_iterator(req):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1264, in _execute_and_fetch_as_iterator
self._handle_error(error)
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1503, in _handle_error
self._handle_rpc_error(error)
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1539, in _handle_rpc_error
raise convert_exception(info, status.message) from None
pyspark.errors.exceptions.connect.ArithmeticException:
[NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION] 83.14 cannot be represented as
Decimal(4, 3). If necessary set "spark.sql.ansi.enabled" to "false" to bypass
this error, and return NULL instead. SQLSTATE: 22003
----------------------------------------------------------------------
{code}
{code}
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py",
line 1057, in pyspark.sql.connect.dataframe.DataFrame.union
Failed example:
df3.show()
Exception raised:
Traceback (most recent call last):
File "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/doctest.py",
line 1355, in __run
exec(compile(example.source, filename, "single",
File "<doctest pyspark.sql.connect.dataframe.DataFrame.union[10]>", line
1, in <module>
df3.show()
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py",
line 996, in show
print(self._show_string(n, truncate, vertical))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py",
line 753, in _show_string
).toPandas()
^^^^^^^^^^
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py",
line 1663, in toPandas
return self._session.client.to_pandas(query)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
line 873, in to_pandas
table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(
^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
line 1283, in _execute_and_fetch
for response in self._execute_and_fetch_as_iterator(req):
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
line 1264, in _execute_and_fetch_as_iterator
self._handle_error(error)
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
line 1503, in _handle_error
self._handle_rpc_error(error)
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
line 1539, in _handle_rpc_error
raise convert_exception(info, status.message) from None
pyspark.errors.exceptions.connect.NumberFormatException:
[CAST_INVALID_INPUT] The value 'Alice' of the type "STRING" cannot be cast to
"BIGINT" because it is malformed. Correct the value as per the syntax, or
change its target type. Use `try_cast` to tolerate malformed input and return
NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass
this error. SQLSTATE: 22018
JVM stacktrace:
org.apache.spark.SparkNumberFormatException: [CAST_INVALID_INPUT] The value
'Alice' of the type "STRING" cannot be cast to "BIGINT" because it is
malformed. Correct the value as per the syntax, or change its target type. Use
`try_cast` to tolerate malformed input and return NULL instead. If necessary
set "spark.sql.ansi.enabled" to "false" to bypass this error. SQLSTATE: 22018
at
org.apache.spark.sql.errors.QueryExecutionErrors$.invalidInputInCastToNumberError(QueryExecutionErrors.scala:145)
at
org.apache.spark.sql.catalyst.util.UTF8StringUtils$.withException(UTF8StringUtils.scala:51)
at
org.apache.spark.sql.catalyst.util.UTF8StringUtils$.toLongExact(UTF8StringUtils.scala:31)
at
org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castToLong$2(Cast.scala:770)
at
org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castToLong$2$adapted(Cast.scala:770)
at
org.apache.spark.sql.catalyst.expressions.Cast.buildCast(Cast.scala:565)
at org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castToLong...
{code}
{code}
**********************************************************************
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/functions.py",
line 3546, in pyspark.sql.connect.functions.current_database
Failed example:
spark.range(1).select(current_database()).show()
Expected:
+------------------+
|current_database()|
+------------------+
| default|
+------------------+
Got:
+----------------+
|current_schema()|
+----------------+
| default|
+----------------+
<BLANKLINE>
**********************************************************************
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/functions.py",
line 3547, in pyspark.sql.connect.functions.current_schema
Failed example:
spark.range(1).select(sf.current_schema()).show()
Expected:
+------------------+
|current_database()|
+------------------+
| default|
+------------------+
Got:
+----------------+
|current_schema()|
+----------------+
| default|
+----------------+
<BLANKLINE>
**********************************************************************
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/functions.py",
line 3310, in pyspark.sql.connect.functions.to_unix_timestamp
Failed example:
df.select(to_unix_timestamp(df.e).alias('r')).collect()
Exception raised:
Traceback (most recent call last):
File "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/doctest.py",
line 1355, in __run
exec(compile(example.source, filename, "single",
File "<doctest pyspark.sql.connect.functions.to_unix_timestamp[6]>", line
1, in <module>
df.select(to_unix_timestamp(df.e).alias('r')).collect()
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py",
line 1645, in collect
table, schema = self._session.client.to_table(query)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
line 858, in to_table
table, schema, _, _, _ = self._execute_and_fetch(req)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
line 1283, in _execute_and_fetch
for response in self._execute_and_fetch_as_iterator(req):
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
line 1264, in _execute_and_fetch_as_iterator
self._handle_error(error)
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
line 1503, in _handle_error
self._handle_rpc_error(error)
File
"/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
line 1539, in _handle_rpc_error
raise convert_exception(info, status.message) from None
pyspark.errors.exceptions.connect.DateTimeException:
[CANNOT_PARSE_TIMESTAMP] Text '2016-04-08' could not be parsed at index 10. If
necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
SQLSTATE: 22007
JVM stacktrace:
org.apache.spark.SparkDateTimeException: [CANNOT_PARSE_TIMESTAMP] Text
'2016-04-08' could not be parsed at index 10. If necessary set
"spark.sql.ansi.enabled" to "false" to bypass this error. SQLSTATE: 22007
at
org.apache.spark.sql.errors.QueryExecutionErrors$.ansiDateTimeParseError(QueryExecutionErrors.scala:271)
at
org.apache.spark.sql.catalyst.expressions.ToTimestamp.eval(datetimeExpressions.scala:1300)
at
org.apache.spark.sql.catalyst.expressions.Alias.eval(namedExpressions.scala:159)
at
org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(InterpretedMutableProjection.scala:89)
at
org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$$anonfun$apply$48.$anonfun$applyOrElse$82(Optimizer.scala:2[208](https://github.com/HyukjinKwon/spark/actions/runs/8918871289/job/24494177776#step:9:209))
at scala.collection.immutable.List.map(List.scala:247)
at scala.collection.immutable.List.map(List.scala:79)
at
org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$$anonfun$apply$48.applyOrElse(Optimizer.scala:[220](https://github.com/HyukjinKwon/spark/actions/runs/8918871289/job/24494177776#step:9:221)8)
at org.apache.spark.sql.catalyst.optimizer...
**********************************************************************
{code}
was:
{code}
======================================================================
FAIL [0.169s]: test_checking_csv_header
(pyspark.sql.tests.connect.test_parity_datasources.DataSourcesParityTests.test_checking_csv_header)
----------------------------------------------------------------------
pyspark.errors.exceptions.connect.SparkConnectGrpcException:
(org.apache.spark.SparkException) [FAILED_READ_FILE.NO_HINT] Encountered error
while reading file
file:///home/runner/work/spark/spark-3.5/python/target/38acabf5-710b-4c21-b359-f61619e2adc7/tmpm7qyq23g/part-00000-d6c8793b-772d-44e7-bcca-6eeae9cc0ec7-c000.csv.
SQLSTATE: KD001
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/test_datasources.py",
line
[167](https://github.com/HyukjinKwon/spark/actions/runs/8908464265/job/24464135564#step:9:168),
in test_checking_csv_header
self.assertRaisesRegex(
AssertionError: "CSV header does not conform to the schema" does not match
"(org.apache.spark.SparkException) [FAILED_READ_FILE.NO_HINT] Encountered error
while reading file
file:///home/runner/work/spark/spark-3.5/python/target/38acabf5-710b-4c21-b359-f61619e2adc7/tmpm7qyq23g/part-00000-d6c8793b-772d-44e7-bcca-6eeae9cc0ec7-c000.csv.
SQLSTATE: KD001"
{code}
{code}
======================================================================
ERROR [0.059s]: test_large_variable_types
(pyspark.sql.tests.connect.test_parity_pandas_map.MapInPandasParityTests.test_large_variable_types)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_map.py",
line 115, in test_large_variable_types
actual = df.mapInPandas(func, "str string, bin binary").collect()
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/dataframe.py",
line 1645, in collect
table, schema = self._session.client.to_table(query)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 858, in to_table
table, schema, _, _, _ = self._execute_and_fetch(req)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1283, in _execute_and_fetch
for response in self._execute_and_fetch_as_iterator(req):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1264, in _execute_and_fetch_as_iterator
self._handle_error(error)
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1503, in _handle_error
self._handle_rpc_error(error)
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1539, in _handle_rpc_error
raise convert_exception(info, status.message) from None
pyspark.errors.exceptions.connect.IllegalArgumentException:
[INVALID_PARAMETER_VALUE.CHARSET] The value of parameter(s) `charset` in
`encode` is invalid: expects one of the charsets 'US-ASCII', 'ISO-8859-1',
'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', but got utf8. SQLSTATE:
2[202](https://github.com/HyukjinKwon/spark/actions/runs/8909131027/job/24465959134#step:9:203)3
{code}
{code}
======================================================================
ERROR [0.024s]: test_assert_approx_equal_decimaltype_custom_rtol_pass
(pyspark.sql.tests.connect.test_utils.ConnectUtilsTests.test_assert_approx_equal_decimaltype_custom_rtol_pass)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/test_utils.py",
line 279, in test_assert_approx_equal_decimaltype_custom_rtol_pass
assertDataFrameEqual(df1, df2, rtol=1e-1)
File "/home/runner/work/spark/spark-3.5/python/pyspark/testing/utils.py",
line 595, in assertDataFrameEqual
actual_list = actual.collect()
^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/dataframe.py",
line 1645, in collect
table, schema = self._session.client.to_table(query)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 858, in to_table
table, schema, _, _, _ = self._execute_and_fetch(req)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1283, in _execute_and_fetch
for response in self._execute_and_fetch_as_iterator(req):
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1264, in _execute_and_fetch_as_iterator
self._handle_error(error)
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1503, in _handle_error
self._handle_rpc_error(error)
File
"/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
line 1539, in _handle_rpc_error
raise convert_exception(info, status.message) from None
pyspark.errors.exceptions.connect.ArithmeticException:
[NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION] 83.14 cannot be represented as
Decimal(4, 3). If necessary set "spark.sql.ansi.enabled" to "false" to bypass
this error, and return NULL instead. SQLSTATE: 22003
----------------------------------------------------------------------
{code}
> ANSI enabled by default brings different results in the tests in 3.5 client
> <> 4.0 server
> -----------------------------------------------------------------------------------------
>
> Key: SPARK-48085
> URL: https://issues.apache.org/jira/browse/SPARK-48085
> Project: Spark
> Issue Type: Sub-task
> Components: Connect, PySpark, SQL
> Affects Versions: 4.0.0
> Reporter: Hyukjin Kwon
> Priority: Major
>
> {code}
> ======================================================================
> FAIL [0.169s]: test_checking_csv_header
> (pyspark.sql.tests.connect.test_parity_datasources.DataSourcesParityTests.test_checking_csv_header)
> ----------------------------------------------------------------------
> pyspark.errors.exceptions.connect.SparkConnectGrpcException:
> (org.apache.spark.SparkException) [FAILED_READ_FILE.NO_HINT] Encountered
> error while reading file
> file:///home/runner/work/spark/spark-3.5/python/target/38acabf5-710b-4c21-b359-f61619e2adc7/tmpm7qyq23g/part-00000-d6c8793b-772d-44e7-bcca-6eeae9cc0ec7-c000.csv.
> SQLSTATE: KD001
> During handling of the above exception, another exception occurred:
> Traceback (most recent call last):
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/test_datasources.py",
> line
> [167](https://github.com/HyukjinKwon/spark/actions/runs/8908464265/job/24464135564#step:9:168),
> in test_checking_csv_header
> self.assertRaisesRegex(
> AssertionError: "CSV header does not conform to the schema" does not match
> "(org.apache.spark.SparkException) [FAILED_READ_FILE.NO_HINT] Encountered
> error while reading file
> file:///home/runner/work/spark/spark-3.5/python/target/38acabf5-710b-4c21-b359-f61619e2adc7/tmpm7qyq23g/part-00000-d6c8793b-772d-44e7-bcca-6eeae9cc0ec7-c000.csv.
> SQLSTATE: KD001"
> {code}
> {code}
> ======================================================================
> ERROR [0.059s]: test_large_variable_types
> (pyspark.sql.tests.connect.test_parity_pandas_map.MapInPandasParityTests.test_large_variable_types)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/pandas/test_pandas_map.py",
> line 115, in test_large_variable_types
> actual = df.mapInPandas(func, "str string, bin binary").collect()
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/dataframe.py",
> line 1645, in collect
> table, schema = self._session.client.to_table(query)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
> line 858, in to_table
> table, schema, _, _, _ = self._execute_and_fetch(req)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
> line 1283, in _execute_and_fetch
> for response in self._execute_and_fetch_as_iterator(req):
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
> line 1264, in _execute_and_fetch_as_iterator
> self._handle_error(error)
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
> line 1503, in _handle_error
> self._handle_rpc_error(error)
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
> line 1539, in _handle_rpc_error
> raise convert_exception(info, status.message) from None
> pyspark.errors.exceptions.connect.IllegalArgumentException:
> [INVALID_PARAMETER_VALUE.CHARSET] The value of parameter(s) `charset` in
> `encode` is invalid: expects one of the charsets 'US-ASCII', 'ISO-8859-1',
> 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', but got utf8. SQLSTATE:
> 2[202](https://github.com/HyukjinKwon/spark/actions/runs/8909131027/job/24465959134#step:9:203)3
> {code}
> {code}
> ======================================================================
> ERROR [0.024s]: test_assert_approx_equal_decimaltype_custom_rtol_pass
> (pyspark.sql.tests.connect.test_utils.ConnectUtilsTests.test_assert_approx_equal_decimaltype_custom_rtol_pass)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/tests/test_utils.py",
> line 279, in test_assert_approx_equal_decimaltype_custom_rtol_pass
> assertDataFrameEqual(df1, df2, rtol=1e-1)
> File "/home/runner/work/spark/spark-3.5/python/pyspark/testing/utils.py",
> line 595, in assertDataFrameEqual
> actual_list = actual.collect()
> ^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/dataframe.py",
> line 1645, in collect
> table, schema = self._session.client.to_table(query)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
> line 858, in to_table
> table, schema, _, _, _ = self._execute_and_fetch(req)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
> line 1283, in _execute_and_fetch
> for response in self._execute_and_fetch_as_iterator(req):
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
> line 1264, in _execute_and_fetch_as_iterator
> self._handle_error(error)
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
> line 1503, in _handle_error
> self._handle_rpc_error(error)
> File
> "/home/runner/work/spark/spark-3.5/python/pyspark/sql/connect/client/core.py",
> line 1539, in _handle_rpc_error
> raise convert_exception(info, status.message) from None
> pyspark.errors.exceptions.connect.ArithmeticException:
> [NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION] 83.14 cannot be represented as
> Decimal(4, 3). If necessary set "spark.sql.ansi.enabled" to "false" to bypass
> this error, and return NULL instead. SQLSTATE: 22003
> ----------------------------------------------------------------------
> {code}
> {code}
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py",
> line 1057, in pyspark.sql.connect.dataframe.DataFrame.union
> Failed example:
> df3.show()
> Exception raised:
> Traceback (most recent call last):
> File
> "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/doctest.py", line
> 1355, in __run
> exec(compile(example.source, filename, "single",
> File "<doctest pyspark.sql.connect.dataframe.DataFrame.union[10]>",
> line 1, in <module>
> df3.show()
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py",
> line 996, in show
> print(self._show_string(n, truncate, vertical))
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py",
> line 753, in _show_string
> ).toPandas()
> ^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py",
> line 1663, in toPandas
> return self._session.client.to_pandas(query)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
> line 873, in to_pandas
> table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(
> ^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
> line 1283, in _execute_and_fetch
> for response in self._execute_and_fetch_as_iterator(req):
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
> line 1264, in _execute_and_fetch_as_iterator
> self._handle_error(error)
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
> line 1503, in _handle_error
> self._handle_rpc_error(error)
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
> line 1539, in _handle_rpc_error
> raise convert_exception(info, status.message) from None
> pyspark.errors.exceptions.connect.NumberFormatException:
> [CAST_INVALID_INPUT] The value 'Alice' of the type "STRING" cannot be cast to
> "BIGINT" because it is malformed. Correct the value as per the syntax, or
> change its target type. Use `try_cast` to tolerate malformed input and return
> NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass
> this error. SQLSTATE: 22018
> JVM stacktrace:
> org.apache.spark.SparkNumberFormatException: [CAST_INVALID_INPUT] The
> value 'Alice' of the type "STRING" cannot be cast to "BIGINT" because it is
> malformed. Correct the value as per the syntax, or change its target type.
> Use `try_cast` to tolerate malformed input and return NULL instead. If
> necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
> SQLSTATE: 22018
> at
> org.apache.spark.sql.errors.QueryExecutionErrors$.invalidInputInCastToNumberError(QueryExecutionErrors.scala:145)
> at
> org.apache.spark.sql.catalyst.util.UTF8StringUtils$.withException(UTF8StringUtils.scala:51)
> at
> org.apache.spark.sql.catalyst.util.UTF8StringUtils$.toLongExact(UTF8StringUtils.scala:31)
> at
> org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castToLong$2(Cast.scala:770)
> at
> org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castToLong$2$adapted(Cast.scala:770)
> at
> org.apache.spark.sql.catalyst.expressions.Cast.buildCast(Cast.scala:565)
> at org.apache.spark.sql.catalyst.expressions.Cast.$anonfun$castToLong...
> {code}
> {code}
> **********************************************************************
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/functions.py",
> line 3546, in pyspark.sql.connect.functions.current_database
> Failed example:
> spark.range(1).select(current_database()).show()
> Expected:
> +------------------+
> |current_database()|
> +------------------+
> | default|
> +------------------+
> Got:
> +----------------+
> |current_schema()|
> +----------------+
> | default|
> +----------------+
> <BLANKLINE>
> **********************************************************************
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/functions.py",
> line 3547, in pyspark.sql.connect.functions.current_schema
> Failed example:
> spark.range(1).select(sf.current_schema()).show()
> Expected:
> +------------------+
> |current_database()|
> +------------------+
> | default|
> +------------------+
> Got:
> +----------------+
> |current_schema()|
> +----------------+
> | default|
> +----------------+
> <BLANKLINE>
> **********************************************************************
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/functions.py",
> line 3310, in pyspark.sql.connect.functions.to_unix_timestamp
> Failed example:
> df.select(to_unix_timestamp(df.e).alias('r')).collect()
> Exception raised:
> Traceback (most recent call last):
> File
> "/opt/hostedtoolcache/Python/3.11.9/x64/lib/python3.11/doctest.py", line
> 1355, in __run
> exec(compile(example.source, filename, "single",
> File "<doctest pyspark.sql.connect.functions.to_unix_timestamp[6]>",
> line 1, in <module>
> df.select(to_unix_timestamp(df.e).alias('r')).collect()
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/dataframe.py",
> line 1645, in collect
> table, schema = self._session.client.to_table(query)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
> line 858, in to_table
> table, schema, _, _, _ = self._execute_and_fetch(req)
> ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
> line 1283, in _execute_and_fetch
> for response in self._execute_and_fetch_as_iterator(req):
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
> line 1264, in _execute_and_fetch_as_iterator
> self._handle_error(error)
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
> line 1503, in _handle_error
> self._handle_rpc_error(error)
> File
> "/home/runner/work/spark/spark-35/python/pyspark/sql/connect/client/core.py",
> line 1539, in _handle_rpc_error
> raise convert_exception(info, status.message) from None
> pyspark.errors.exceptions.connect.DateTimeException:
> [CANNOT_PARSE_TIMESTAMP] Text '2016-04-08' could not be parsed at index 10.
> If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
> SQLSTATE: 22007
> JVM stacktrace:
> org.apache.spark.SparkDateTimeException: [CANNOT_PARSE_TIMESTAMP] Text
> '2016-04-08' could not be parsed at index 10. If necessary set
> "spark.sql.ansi.enabled" to "false" to bypass this error. SQLSTATE: 22007
> at
> org.apache.spark.sql.errors.QueryExecutionErrors$.ansiDateTimeParseError(QueryExecutionErrors.scala:271)
> at
> org.apache.spark.sql.catalyst.expressions.ToTimestamp.eval(datetimeExpressions.scala:1300)
> at
> org.apache.spark.sql.catalyst.expressions.Alias.eval(namedExpressions.scala:159)
> at
> org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(InterpretedMutableProjection.scala:89)
> at
> org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$$anonfun$apply$48.$anonfun$applyOrElse$82(Optimizer.scala:2[208](https://github.com/HyukjinKwon/spark/actions/runs/8918871289/job/24494177776#step:9:209))
> at scala.collection.immutable.List.map(List.scala:247)
> at scala.collection.immutable.List.map(List.scala:79)
> at
> org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation$$anonfun$apply$48.applyOrElse(Optimizer.scala:[220](https://github.com/HyukjinKwon/spark/actions/runs/8918871289/job/24494177776#step:9:221)8)
> at org.apache.spark.sql.catalyst.optimizer...
> **********************************************************************
> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]