ueshin commented on a change in pull request #32738:
URL: https://github.com/apache/spark/pull/32738#discussion_r644396086
##########
File path: python/pyspark/pandas/indexing.py
##########
@@ -71,27 +71,27 @@ def __init__(self, psdf_or_psser):
self._psdf_or_psser = psdf_or_psser
@property
- def _is_df(self):
+ def _is_df(self) -> bool:
from pyspark.pandas.frame import DataFrame
return isinstance(self._psdf_or_psser, DataFrame)
@property
- def _is_series(self):
+ def _is_series(self) -> bool:
from pyspark.pandas.series import Series
return isinstance(self._psdf_or_psser, Series)
@property
- def _psdf(self):
+ def _psdf(self) -> "DataFrame":
if self._is_df:
- return self._psdf_or_psser
+ return self._psdf_or_psser # type: ignore
Review comment:
Could you use `cast` instead of `ignore`?
##########
File path: python/pyspark/pandas/indexing.py
##########
@@ -514,7 +514,7 @@ def __getitem__(self, key) -> Union["Series", "DataFrame"]:
except AnalysisException:
raise KeyError(
"[{}] don't exist in columns".format(
- [col._jc.toString() for col in data_spark_columns]
+ [col._jc.toString() for col in data_spark_columns] #
type: ignore
Review comment:
ditto.
##########
File path: python/pyspark/pandas/indexing.py
##########
@@ -608,7 +608,9 @@ def __setitem__(self, key, value):
if cond is None:
cond = F.lit(True)
if limit is not None:
- cond = cond & (self._internal.spark_frame[self._sequence_col]
< F.lit(limit))
+ cond = cond & (
+ self._internal.
+ spark_frame[self._sequence_col] < F.lit(limit)) # type:
ignore
Review comment:
ditto.
##########
File path: python/pyspark/pandas/indexing.py
##########
@@ -174,7 +174,7 @@ def __getitem__(self, key) -> Union["Series", "DataFrame",
Scalar]:
if len(pdf) < 1:
raise KeyError(name_like_string(row_sel))
- values = pdf.iloc[:, 0].values
+ values = pdf.iloc[:, 0].values # type: ignore
Review comment:
I guess `values = cast(pd.DataFrame, pdf).iloc[:, 0].values`?
or we should cast right after `toPandas()` above?
##########
File path: python/pyspark/pandas/generic.py
##########
@@ -3064,25 +3064,25 @@ def ffill(self, axis=None, inplace=False, limit=None)
-> Union["DataFrame", "Ser
@property
def at(self) -> AtIndexer:
- return AtIndexer(self)
+ return AtIndexer(self) # type: ignore
Review comment:
Do we need the `ignore`? What happens without it?
##########
File path: python/pyspark/pandas/indexing.py
##########
@@ -445,7 +445,7 @@ def __getitem__(self, key) -> Union["Series", "DataFrame"]:
if isinstance(rows_sel, Series) and not same_anchor(rows_sel,
self._psdf_or_psser):
psdf = self._psdf_or_psser.copy()
- temp_col = verify_temp_column_name(psdf, "__temp_col__")
+ temp_col = verify_temp_column_name(psdf, "__temp_col__") #
type: ignore
Review comment:
What happens without `ignore?
##########
File path: python/pyspark/pandas/indexing.py
##########
@@ -685,13 +691,15 @@ def __setitem__(self, key, value):
return
cond, limit, remaining_index = self._select_rows(rows_sel)
- missing_keys = []
+ missing_keys: Optional[List[Tuple]] = []
Review comment:
Shall we use a comment style annotation for now?
```py
... = [] # type: Optional[List[Tuple]]
```
##########
File path: python/pyspark/pandas/indexing.py
##########
@@ -1138,7 +1146,8 @@ def _select_rows_else(
)
def _get_from_multiindex_column(
- self, key, missing_keys, labels=None, recursed=0
+ self, key: Optional[Tuple], missing_keys: Optional[List[Tuple]],
+ labels: Optional[List[Tuple]] = None, recursed: int = 0
Review comment:
The style seems weird?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]