[
https://issues.apache.org/jira/browse/SPARK-34703?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
L. C. Hsieh updated SPARK-34703:
--------------------------------
Description:
Three PySpark tests are currently failed in Jenkins 2.4 build:
test_column_order, test_complex_groupby, test_udf_with_key.
{code}
======================================================================
ERROR: test_column_order (pyspark.sql.tests.GroupedMapPandasUDFTests)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/spark/python/pyspark/sql/tests.py", line 5996, in test_column_order
expected = pd_result.sort_values(['id', 'v']).reset_index(drop=True)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line
4711, in sort_values
for x in by]
File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
1702, in _get_label_or_level_values
self._check_label_or_level_ambiguity(key, axis=axis)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
1656, in _check_label_or_level_ambiguity
raise ValueError(msg)
ValueError: 'id' is both an index level and a column label, which is ambiguous.
======================================================================
ERROR: test_complex_groupby (pyspark.sql.tests.GroupedMapPandasUDFTests)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/spark/python/pyspark/sql/tests.py", line 5765, in test_complex_groupby
expected = expected.sort_values(['id', 'v']).reset_index(drop=True)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line
4711, in sort_values
for x in by]
File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
1702, in _get_label_or_level_values
self._check_label_or_level_ambiguity(key, axis=axis)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
1656, in _check_label_or_level_ambiguity
raise ValueError(msg)
ValueError: 'id' is both an index level and a column label, which is ambiguous.
======================================================================
ERROR: test_udf_with_key (pyspark.sql.tests.GroupedMapPandasUDFTests)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/spark/python/pyspark/sql/tests.py", line 5922, in test_udf_with_key
.sort_values(['id', 'v']).reset_index(drop=True)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line
4711, in sort_values
for x in by]
File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
1702, in _get_label_or_level_values
self._check_label_or_level_ambiguity(key, axis=axis)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
1656, in _check_label_or_level_ambiguity
raise ValueError(msg)
ValueError: 'id' is both an index level and a column label, which is ambiguous.
{code}
was:
Three PySpark tests are currently failed in Jenkins: test_column_order,
test_complex_groupby, test_udf_with_key.
{code}
======================================================================
ERROR: test_column_order (pyspark.sql.tests.GroupedMapPandasUDFTests)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/spark/python/pyspark/sql/tests.py", line 5996, in test_column_order
expected = pd_result.sort_values(['id', 'v']).reset_index(drop=True)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line
4711, in sort_values
for x in by]
File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
1702, in _get_label_or_level_values
self._check_label_or_level_ambiguity(key, axis=axis)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
1656, in _check_label_or_level_ambiguity
raise ValueError(msg)
ValueError: 'id' is both an index level and a column label, which is ambiguous.
======================================================================
ERROR: test_complex_groupby (pyspark.sql.tests.GroupedMapPandasUDFTests)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/spark/python/pyspark/sql/tests.py", line 5765, in test_complex_groupby
expected = expected.sort_values(['id', 'v']).reset_index(drop=True)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line
4711, in sort_values
for x in by]
File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
1702, in _get_label_or_level_values
self._check_label_or_level_ambiguity(key, axis=axis)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
1656, in _check_label_or_level_ambiguity
raise ValueError(msg)
ValueError: 'id' is both an index level and a column label, which is ambiguous.
======================================================================
ERROR: test_udf_with_key (pyspark.sql.tests.GroupedMapPandasUDFTests)
----------------------------------------------------------------------
Traceback (most recent call last):
File "/spark/python/pyspark/sql/tests.py", line 5922, in test_udf_with_key
.sort_values(['id', 'v']).reset_index(drop=True)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line
4711, in sort_values
for x in by]
File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
1702, in _get_label_or_level_values
self._check_label_or_level_ambiguity(key, axis=axis)
File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
1656, in _check_label_or_level_ambiguity
raise ValueError(msg)
ValueError: 'id' is both an index level and a column label, which is ambiguous.
{code}
> Fix pyspark test when using sort_values on Pandas
> -------------------------------------------------
>
> Key: SPARK-34703
> URL: https://issues.apache.org/jira/browse/SPARK-34703
> Project: Spark
> Issue Type: Bug
> Components: PySpark
> Affects Versions: 2.4.7
> Reporter: L. C. Hsieh
> Assignee: L. C. Hsieh
> Priority: Major
>
> Three PySpark tests are currently failed in Jenkins 2.4 build:
> test_column_order, test_complex_groupby, test_udf_with_key.
> {code}
> ======================================================================
>
>
> ERROR: test_column_order (pyspark.sql.tests.GroupedMapPandasUDFTests)
>
>
> ----------------------------------------------------------------------
> Traceback (most recent call last):
>
> File "/spark/python/pyspark/sql/tests.py", line 5996, in test_column_order
>
>
> expected = pd_result.sort_values(['id', 'v']).reset_index(drop=True)
>
> File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line
> 4711, in sort_values
> for x in by]
> File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
> 1702, in _get_label_or_level_values
> self._check_label_or_level_ambiguity(key, axis=axis)
> File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
> 1656, in _check_label_or_level_ambiguity
> raise ValueError(msg)
>
> ValueError: 'id' is both an index level and a column label, which is
> ambiguous.
>
> ======================================================================
> ERROR: test_complex_groupby (pyspark.sql.tests.GroupedMapPandasUDFTests)
> ----------------------------------------------------------------------
> Traceback (most recent call last):
> File "/spark/python/pyspark/sql/tests.py", line 5765, in
> test_complex_groupby
> expected = expected.sort_values(['id', 'v']).reset_index(drop=True)
> File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line
> 4711, in sort_values
> for x in by]
>
> File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
> 1702, in _get_label_or_level_values
>
> self._check_label_or_level_ambiguity(key, axis=axis)
>
>
> File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
> 1656, in _check_label_or_level_ambiguity
>
> raise ValueError(msg)
>
>
> ValueError: 'id' is both an index level and a column label, which is
> ambiguous.
>
>
>
>
>
> ======================================================================
>
> ERROR: test_udf_with_key (pyspark.sql.tests.GroupedMapPandasUDFTests)
>
>
> ----------------------------------------------------------------------
>
> Traceback (most recent call last):
>
>
> File "/spark/python/pyspark/sql/tests.py", line 5922, in test_udf_with_key
> .sort_values(['id', 'v']).reset_index(drop=True)
>
>
> File "/usr/local/lib/python2.7/dist-packages/pandas/core/frame.py", line
> 4711, in sort_values
>
> for x in by]
>
>
> File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
> 1702, in _get_label_or_level_values
> self._check_label_or_level_ambiguity(key, axis=axis)
>
>
> File "/usr/local/lib/python2.7/dist-packages/pandas/core/generic.py", line
> 1656, in _check_label_or_level_ambiguity
>
> raise ValueError(msg)
>
>
> ValueError: 'id' is both an index level and a column label, which is
> ambiguous.
> {code}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]
