Yikun commented on pull request #34717:
URL: https://github.com/apache/spark/pull/34717#issuecomment-981463864
Complete all `pyspark-pandas-slow` test with:
```
python/run-tests --modules=pyspark-pandas --parallelism=2
--python-executable=python3
```
Serveral test cases failed in 1.0.1 due to `AttributeError: type object
'object' has no attribute 'dtype'` and **passed with pandas v1.0.5**.
<details>
<summary>Test failure details</summary>
```
======================================================================
ERROR: test_astype
(pyspark.pandas.tests.data_type_ops.test_categorical_ops.CategoricalOpsTest)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/Users/jiangyikun/spark/spark/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py",
line 204, in test_astype
self.assert_eq(pser.astype(int), psser.astype(int))
File
"/Users/jiangyikun/spark/spark/python/pyspark/testing/pandasutils.py", line
224, in assert_eq
robj = self._to_pandas(right)
File
"/Users/jiangyikun/spark/spark/python/pyspark/testing/pandasutils.py", line
245, in _to_pandas
return obj.to_pandas()
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/series.py", line
1588, in to_pandas
return self._to_pandas()
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/series.py", line
1594, in _to_pandas
return self._to_internal_pandas().copy()
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/series.py", line
6349, in _to_internal_pandas
return self._psdf._internal.to_pandas_frame[self.name]
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/utils.py", line
584, in wrapped_lazy_property
setattr(self, attr_name, fn(self))
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/internal.py",
line 1049, in to_pandas_frame
pdf = sdf.toPandas()
File
"/Users/jiangyikun/spark/spark/python/pyspark/sql/pandas/conversion.py", line
185, in toPandas
pdf = pd.DataFrame(columns=tmp_column_names).astype(
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/frame.py", line
435, in __init__
mgr = init_dict(data, index, columns, dtype=dtype)
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py",
line 239, in init_dict
val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype)
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/dtypes/cast.py",
line 1449, in construct_1d_arraylike_from_scalar
dtype = dtype.dtype
AttributeError: type object 'object' has no attribute 'dtype'
======================================================================
ERROR: test_read_csv (pyspark.pandas.tests.test_csv.CsvTest)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/Users/jiangyikun/spark/spark/python/pyspark/pandas/tests/test_csv.py", line
151, in test_read_csv
check(usecols=[])
File
"/Users/jiangyikun/spark/spark/python/pyspark/pandas/tests/test_csv.py", line
138, in check
self.assert_eq(expected, actual, almost=True)
File
"/Users/jiangyikun/spark/spark/python/pyspark/testing/pandasutils.py", line
224, in assert_eq
robj = self._to_pandas(right)
File
"/Users/jiangyikun/spark/spark/python/pyspark/testing/pandasutils.py", line
245, in _to_pandas
return obj.to_pandas()
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/frame.py", line
4856, in to_pandas
return self._to_pandas()
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/frame.py", line
4862, in _to_pandas
return self._internal.to_pandas_frame.copy()
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/utils.py", line
584, in wrapped_lazy_property
setattr(self, attr_name, fn(self))
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/internal.py",
line 1049, in to_pandas_frame
pdf = sdf.toPandas()
File
"/Users/jiangyikun/spark/spark/python/pyspark/sql/pandas/conversion.py", line
185, in toPandas
pdf = pd.DataFrame(columns=tmp_column_names).astype(
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/frame.py", line
435, in __init__
mgr = init_dict(data, index, columns, dtype=dtype)
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py",
line 239, in init_dict
val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype)
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/dtypes/cast.py",
line 1449, in construct_1d_arraylike_from_scalar
dtype = dtype.dtype
AttributeError: type object 'object' has no attribute 'dtype'
======================================================================
ERROR: test_kde_plot
(pyspark.pandas.tests.plot.test_frame_plot_plotly.DataFramePlotPlotlyTest)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/Users/jiangyikun/spark/spark/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py",
line 262, in test_kde_plot
actual = psdf.plot.kde(bw_method=5, ind=3)
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/core.py",
line 946, in kde
return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/core.py",
line 498, in __call__
return plot_backend.plot_pandas_on_spark(plot_data, kind=kind, **kwargs)
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/plotly.py",
line 44, in plot_pandas_on_spark
return plot_kde(data, **kwargs)
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/plotly.py",
line 202, in plot_kde
pd.DataFrame(
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/frame.py", line
435, in __init__
mgr = init_dict(data, index, columns, dtype=dtype)
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py",
line 254, in init_dict
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py",
line 69, in arrays_to_mgr
arrays = _homogenize(arrays, index, dtype)
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py",
line 322, in _homogenize
val = sanitize_array(
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/construction.py",
line 465, in sanitize_array
subarr = construct_1d_arraylike_from_scalar(value, len(index), dtype)
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/dtypes/cast.py",
line 1461, in construct_1d_arraylike_from_scalar
subarr = np.empty(length, dtype=dtype)
TypeError: Cannot interpret '<attribute 'dtype' of 'numpy.generic' objects>'
as a data type
======================================================================
ERROR: test_kde_plot
(pyspark.pandas.tests.plot.test_series_plot_plotly.SeriesPlotPlotlyTest)
----------------------------------------------------------------------
Traceback (most recent call last):
File
"/Users/jiangyikun/spark/spark/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py",
line 231, in test_kde_plot
actual = psdf.a.plot.kde(bw_method=5, ind=3)
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/core.py",
line 946, in kde
return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/core.py",
line 498, in __call__
return plot_backend.plot_pandas_on_spark(plot_data, kind=kind, **kwargs)
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/plotly.py",
line 44, in plot_pandas_on_spark
return plot_kde(data, **kwargs)
File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/plotly.py",
line 202, in plot_kde
pd.DataFrame(
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/frame.py", line
435, in __init__
mgr = init_dict(data, index, columns, dtype=dtype)
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py",
line 254, in init_dict
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py",
line 69, in arrays_to_mgr
arrays = _homogenize(arrays, index, dtype)
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py",
line 322, in _homogenize
val = sanitize_array(
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/construction.py",
line 465, in sanitize_array
subarr = construct_1d_arraylike_from_scalar(value, len(index), dtype)
File
"/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/dtypes/cast.py",
line 1461, in construct_1d_arraylike_from_scalar
subarr = np.empty(length, dtype=dtype)
TypeError: Cannot interpret '<attribute 'dtype' of 'numpy.generic' objects>'
as a data type
```
</details>
At this time, I prefer to update to **1.0.5**, I'm going to run
`pyspark-pandas-slow` now.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]