anthonycroft commented on PR #47175:
URL: https://github.com/apache/spark/pull/47175#issuecomment-2480556404
How do we get round this in JupyterLab, by default as of time of writing:
numpy==2.0.2 and pyspark==3.5.3.
JupyterLab implements a Docker container internally (I believe), so no way
of downgrading packages.
Full Traceback
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[8], line 3
1 import pandas as pd
2 import numpy as np
----> 3 import pyspark.pandas as ps
4 from pyspark.sql import SparkSession
6 os.environ["PYARROW_IGNORE_TIMEZONE"] = "1"
File
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\pyspark\pandas\__init__.py:60](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/pyspark/pandas/__init__.py#line=59)
57 os.environ["PYARROW_IGNORE_TIMEZONE"] = "1"
59 from pyspark.pandas.frame import DataFrame
---> 60 from pyspark.pandas.indexes.base import Index
61 from pyspark.pandas.indexes.category import CategoricalIndex
62 from pyspark.pandas.indexes.datetimes import DatetimeIndex
File
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\pyspark\pandas\indexes\__init__.py:17](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/pyspark/pandas/indexes/__init__.py#line=16)
1 #
2 # Licensed to the Apache Software Foundation (ASF) under one or more
3 # contributor license agreements. See the NOTICE file distributed
with
(...)
15 # limitations under the License.
16 #
---> 17 from pyspark.pandas.indexes.base import Index # noqa: F401
18 from pyspark.pandas.indexes.datetimes import DatetimeIndex # noqa:
F401
19 from pyspark.pandas.indexes.multi import MultiIndex # noqa: F401
File
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\pyspark\pandas\indexes\base.py:66](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/pyspark/pandas/indexes/base.py#line=65)
64 from pyspark.pandas.frame import DataFrame
65 from pyspark.pandas.missing.indexes import MissingPandasLikeIndex
---> 66 from pyspark.pandas.series import Series, first_series
67 from pyspark.pandas.spark.accessors import SparkIndexMethods
68 from pyspark.pandas.utils import (
69 is_name_like_tuple,
70 is_name_like_value,
(...)
78 log_advice,
79 )
File
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\pyspark\pandas\series.py:118](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/pyspark/pandas/series.py#line=117)
116 from pyspark.pandas.spark import functions as SF
117 from pyspark.pandas.spark.accessors import SparkSeriesMethods
--> 118 from pyspark.pandas.strings import StringMethods
119 from pyspark.pandas.typedef import (
120 infer_return_type,
121 spark_type_to_pandas_dtype,
(...)
124 create_type_for_series_type,
125 )
126 from pyspark.pandas.typedef.typehints import as_spark_type
File
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\pyspark\pandas\strings.py:44](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/pyspark/pandas/strings.py#line=43)
40 import pyspark.pandas as ps
41 from pyspark.pandas.spark import functions as SF
---> 44 class StringMethods:
45 """String methods for pandas-on-Spark Series"""
47 def __init__(self, series: "ps.Series"):
File
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\pyspark\pandas\strings.py:1332](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/pyspark/pandas/strings.py#line=1331),
in StringMethods()
1328 return s.str.ljust(width, fillchar)
1330 return self._data.pandas_on_spark.transform_batch(pandas_ljust)
-> 1332 def match(self, pat: str, case: bool = True, flags: int = 0, na: Any
= np.NaN) -> "ps.Series":
1333 """
1334 Determine if each string matches a regular expression.
1335
(...)
1390 dtype: object
1391 """
1393 def pandas_match(s) -> ps.Series[bool]: # type:
ignore[no-untyped-def]
File
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\numpy\__init__.py:411](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/numpy/__init__.py#line=410),
in __getattr__(attr)
408 raise AttributeError(__former_attrs__[attr])
410 if attr in __expired_attributes__:
--> 411 raise AttributeError(
412 f"`np.{attr}` was removed in the NumPy 2.0 release. "
413 f"{__expired_attributes__[attr]}"
414 )
416 if attr == "chararray":
417 warnings.warn(
418 "`np.chararray` is deprecated and will be removed from "
419 "the main namespace in the future. Use an array with a
string "
420 "or bytes dtype instead.", DeprecationWarning, stacklevel=2)
AttributeError: `np.NaN` was removed in the NumPy 2.0 release. Use `np.nan`
instead.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]