anthonycroft commented on PR #47175:
URL: https://github.com/apache/spark/pull/47175#issuecomment-2480556404

   How do we get round this in JupyterLab, by default as of time of writing: 
numpy==2.0.2 and pyspark==3.5.3.
   
   JupyterLab implements a Docker container internally (I believe), so no way 
of downgrading packages.
   
   Full Traceback
   
   ---------------------------------------------------------------------------
   AttributeError                            Traceback (most recent call last)
   Cell In[8], line 3
         1 import pandas as pd
         2 import numpy as np
   ----> 3 import pyspark.pandas as ps
         4 from pyspark.sql import SparkSession
         6 os.environ["PYARROW_IGNORE_TIMEZONE"] = "1"
   
   File 
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\pyspark\pandas\__init__.py:60](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/pyspark/pandas/__init__.py#line=59)
        57     os.environ["PYARROW_IGNORE_TIMEZONE"] = "1"
        59 from pyspark.pandas.frame import DataFrame
   ---> 60 from pyspark.pandas.indexes.base import Index
        61 from pyspark.pandas.indexes.category import CategoricalIndex
        62 from pyspark.pandas.indexes.datetimes import DatetimeIndex
   
   File 
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\pyspark\pandas\indexes\__init__.py:17](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/pyspark/pandas/indexes/__init__.py#line=16)
         1 #
         2 # Licensed to the Apache Software Foundation (ASF) under one or more
         3 # contributor license agreements.  See the NOTICE file distributed 
with
      (...)
        15 # limitations under the License.
        16 #
   ---> 17 from pyspark.pandas.indexes.base import Index  # noqa: F401
        18 from pyspark.pandas.indexes.datetimes import DatetimeIndex  # noqa: 
F401
        19 from pyspark.pandas.indexes.multi import MultiIndex  # noqa: F401
   
   File 
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\pyspark\pandas\indexes\base.py:66](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/pyspark/pandas/indexes/base.py#line=65)
        64 from pyspark.pandas.frame import DataFrame
        65 from pyspark.pandas.missing.indexes import MissingPandasLikeIndex
   ---> 66 from pyspark.pandas.series import Series, first_series
        67 from pyspark.pandas.spark.accessors import SparkIndexMethods
        68 from pyspark.pandas.utils import (
        69     is_name_like_tuple,
        70     is_name_like_value,
      (...)
        78     log_advice,
        79 )
   
   File 
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\pyspark\pandas\series.py:118](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/pyspark/pandas/series.py#line=117)
       116 from pyspark.pandas.spark import functions as SF
       117 from pyspark.pandas.spark.accessors import SparkSeriesMethods
   --> 118 from pyspark.pandas.strings import StringMethods
       119 from pyspark.pandas.typedef import (
       120     infer_return_type,
       121     spark_type_to_pandas_dtype,
      (...)
       124     create_type_for_series_type,
       125 )
       126 from pyspark.pandas.typedef.typehints import as_spark_type
   
   File 
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\pyspark\pandas\strings.py:44](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/pyspark/pandas/strings.py#line=43)
        40 import pyspark.pandas as ps
        41 from pyspark.pandas.spark import functions as SF
   ---> 44 class StringMethods:
        45     """String methods for pandas-on-Spark Series"""
        47     def __init__(self, series: "ps.Series"):
   
   File 
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\pyspark\pandas\strings.py:1332](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/pyspark/pandas/strings.py#line=1331),
 in StringMethods()
      1328         return s.str.ljust(width, fillchar)
      1330     return self._data.pandas_on_spark.transform_batch(pandas_ljust)
   -> 1332 def match(self, pat: str, case: bool = True, flags: int = 0, na: Any 
= np.NaN) -> "ps.Series":
      1333     """
      1334     Determine if each string matches a regular expression.
      1335 
      (...)
      1390     dtype: object
      1391     """
      1393     def pandas_match(s) -> ps.Series[bool]:  # type: 
ignore[no-untyped-def]
   
   File 
\\?\[C:\Users\tonyj\AppData\Roaming\jupyterlab-desktop\envs\env_1\Lib\site-packages\numpy\__init__.py:411](file:///C:/Users/tonyj/AppData/Roaming/jupyterlab-desktop/envs/env_1/Lib/site-packages/numpy/__init__.py#line=410),
 in __getattr__(attr)
       408     raise AttributeError(__former_attrs__[attr])
       410 if attr in __expired_attributes__:
   --> 411     raise AttributeError(
       412         f"`np.{attr}` was removed in the NumPy 2.0 release. "
       413         f"{__expired_attributes__[attr]}"
       414     )
       416 if attr == "chararray":
       417     warnings.warn(
       418         "`np.chararray` is deprecated and will be removed from "
       419         "the main namespace in the future. Use an array with a 
string "
       420         "or bytes dtype instead.", DeprecationWarning, stacklevel=2)
   
   AttributeError: `np.NaN` was removed in the NumPy 2.0 release. Use `np.nan` 
instead.


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to