Yikun commented on pull request #34717: URL: https://github.com/apache/spark/pull/34717#issuecomment-981463864
Complete all `pyspark-pandas-slow` test with: ``` python/run-tests --modules=pyspark-pandas --parallelism=2 --python-executable=python3 ``` Serveral test cases failed in 1.0.1 due to `AttributeError: type object 'object' has no attribute 'dtype'` and **passed with pandas v1.0.5**. <details> <summary>Test failure details</summary> ``` ====================================================================== ERROR: test_astype (pyspark.pandas.tests.data_type_ops.test_categorical_ops.CategoricalOpsTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/tests/data_type_ops/test_categorical_ops.py", line 204, in test_astype self.assert_eq(pser.astype(int), psser.astype(int)) File "/Users/jiangyikun/spark/spark/python/pyspark/testing/pandasutils.py", line 224, in assert_eq robj = self._to_pandas(right) File "/Users/jiangyikun/spark/spark/python/pyspark/testing/pandasutils.py", line 245, in _to_pandas return obj.to_pandas() File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/series.py", line 1588, in to_pandas return self._to_pandas() File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/series.py", line 1594, in _to_pandas return self._to_internal_pandas().copy() File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/series.py", line 6349, in _to_internal_pandas return self._psdf._internal.to_pandas_frame[self.name] File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/utils.py", line 584, in wrapped_lazy_property setattr(self, attr_name, fn(self)) File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/internal.py", line 1049, in to_pandas_frame pdf = sdf.toPandas() File "/Users/jiangyikun/spark/spark/python/pyspark/sql/pandas/conversion.py", line 185, in toPandas pdf = pd.DataFrame(columns=tmp_column_names).astype( File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 435, in __init__ mgr = init_dict(data, index, columns, dtype=dtype) File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py", line 239, in init_dict val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/dtypes/cast.py", line 1449, in construct_1d_arraylike_from_scalar dtype = dtype.dtype AttributeError: type object 'object' has no attribute 'dtype' ====================================================================== ERROR: test_read_csv (pyspark.pandas.tests.test_csv.CsvTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/tests/test_csv.py", line 151, in test_read_csv check(usecols=[]) File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/tests/test_csv.py", line 138, in check self.assert_eq(expected, actual, almost=True) File "/Users/jiangyikun/spark/spark/python/pyspark/testing/pandasutils.py", line 224, in assert_eq robj = self._to_pandas(right) File "/Users/jiangyikun/spark/spark/python/pyspark/testing/pandasutils.py", line 245, in _to_pandas return obj.to_pandas() File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/frame.py", line 4856, in to_pandas return self._to_pandas() File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/frame.py", line 4862, in _to_pandas return self._internal.to_pandas_frame.copy() File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/utils.py", line 584, in wrapped_lazy_property setattr(self, attr_name, fn(self)) File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/internal.py", line 1049, in to_pandas_frame pdf = sdf.toPandas() File "/Users/jiangyikun/spark/spark/python/pyspark/sql/pandas/conversion.py", line 185, in toPandas pdf = pd.DataFrame(columns=tmp_column_names).astype( File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 435, in __init__ mgr = init_dict(data, index, columns, dtype=dtype) File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py", line 239, in init_dict val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/dtypes/cast.py", line 1449, in construct_1d_arraylike_from_scalar dtype = dtype.dtype AttributeError: type object 'object' has no attribute 'dtype' ====================================================================== ERROR: test_kde_plot (pyspark.pandas.tests.plot.test_frame_plot_plotly.DataFramePlotPlotlyTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py", line 262, in test_kde_plot actual = psdf.plot.kde(bw_method=5, ind=3) File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/core.py", line 946, in kde return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/core.py", line 498, in __call__ return plot_backend.plot_pandas_on_spark(plot_data, kind=kind, **kwargs) File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/plotly.py", line 44, in plot_pandas_on_spark return plot_kde(data, **kwargs) File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/plotly.py", line 202, in plot_kde pd.DataFrame( File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 435, in __init__ mgr = init_dict(data, index, columns, dtype=dtype) File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py", line 254, in init_dict return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype) File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py", line 69, in arrays_to_mgr arrays = _homogenize(arrays, index, dtype) File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py", line 322, in _homogenize val = sanitize_array( File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/construction.py", line 465, in sanitize_array subarr = construct_1d_arraylike_from_scalar(value, len(index), dtype) File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/dtypes/cast.py", line 1461, in construct_1d_arraylike_from_scalar subarr = np.empty(length, dtype=dtype) TypeError: Cannot interpret '<attribute 'dtype' of 'numpy.generic' objects>' as a data type ====================================================================== ERROR: test_kde_plot (pyspark.pandas.tests.plot.test_series_plot_plotly.SeriesPlotPlotlyTest) ---------------------------------------------------------------------- Traceback (most recent call last): File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py", line 231, in test_kde_plot actual = psdf.a.plot.kde(bw_method=5, ind=3) File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/core.py", line 946, in kde return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs) File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/core.py", line 498, in __call__ return plot_backend.plot_pandas_on_spark(plot_data, kind=kind, **kwargs) File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/plotly.py", line 44, in plot_pandas_on_spark return plot_kde(data, **kwargs) File "/Users/jiangyikun/spark/spark/python/pyspark/pandas/plot/plotly.py", line 202, in plot_kde pd.DataFrame( File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/frame.py", line 435, in __init__ mgr = init_dict(data, index, columns, dtype=dtype) File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py", line 254, in init_dict return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype) File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py", line 69, in arrays_to_mgr arrays = _homogenize(arrays, index, dtype) File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/internals/construction.py", line 322, in _homogenize val = sanitize_array( File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/construction.py", line 465, in sanitize_array subarr = construct_1d_arraylike_from_scalar(value, len(index), dtype) File "/Users/jiangyikun/venv/lib/python3.8/site-packages/pandas/core/dtypes/cast.py", line 1461, in construct_1d_arraylike_from_scalar subarr = np.empty(length, dtype=dtype) TypeError: Cannot interpret '<attribute 'dtype' of 'numpy.generic' objects>' as a data type ``` </details> At this time, I prefer to update to **1.0.5**, I'm going to run `pyspark-pandas-slow` now. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org