This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 9a004ae [SPARK-36265][PYTHON] Use __getitem__ instead of getItem to
suppress warnings
9a004ae is described below
commit 9a004ae12d0fe5038e11a10586a032dcd574f08f
Author: Takuya UESHIN <[email protected]>
AuthorDate: Fri Jul 23 11:27:31 2021 +0900
[SPARK-36265][PYTHON] Use __getitem__ instead of getItem to suppress
warnings
### What changes were proposed in this pull request?
Use `Column.__getitem__` instead of `Column.getItem` to suppress warnings.
### Why are the changes needed?
In pandas API on Spark code base, there are some places using
`Column.getItem` with `Column` object, but it shows a deprecation warning.
### Does this PR introduce _any_ user-facing change?
Yes, users won't see the warnings anymore.
- before
```py
>>> s = ps.Series(list("abbccc"), dtype="category")
>>> s.astype(str)
/path/to/spark/python/pyspark/sql/column.py:322: FutureWarning: A column as
'key' in getItem is deprecated as of Spark 3.0, and will not be supported in
the future release. Use `column[key]` or `column.key` syntax instead.
warnings.warn(
0 a
1 b
2 b
3 c
4 c
5 c
dtype: object
```
- after
```py
>>> s = ps.Series(list("abbccc"), dtype="category")
>>> s.astype(str)
0 a
1 b
2 b
3 c
4 c
5 c
dtype: object
```
### How was this patch tested?
Existing tests.
Closes #33486 from ueshin/issues/SPARK-36265/getitem.
Authored-by: Takuya UESHIN <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
(cherry picked from commit a76a087f7f3ed734426a8842b6f2e4d13d080399)
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/pandas/base.py | 4 ++--
python/pyspark/pandas/data_type_ops/base.py | 2 +-
python/pyspark/pandas/data_type_ops/categorical_ops.py | 2 +-
python/pyspark/pandas/frame.py | 2 +-
4 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py
index ff17fdf..f547f71 100644
--- a/python/pyspark/pandas/base.py
+++ b/python/pyspark/pandas/base.py
@@ -1580,7 +1580,7 @@ class IndexOpsMixin(object, metaclass=ABCMeta):
)
)
map_scol = F.create_map(*kvs)
- scol = map_scol.getItem(self.spark.column)
+ scol = map_scol[self.spark.column]
codes, uniques = self._with_new_scol(
scol.alias(self._internal.data_spark_column_names[0])
).factorize(na_sentinel=na_sentinel)
@@ -1636,7 +1636,7 @@ class IndexOpsMixin(object, metaclass=ABCMeta):
map_scol = F.create_map(*kvs)
null_scol = F.when(cond, SF.lit(na_sentinel_code))
- new_scol = null_scol.otherwise(map_scol.getItem(scol))
+ new_scol = null_scol.otherwise(map_scol[scol])
codes =
self._with_new_scol(new_scol.alias(self._internal.data_spark_column_names[0]))
diff --git a/python/pyspark/pandas/data_type_ops/base.py
b/python/pyspark/pandas/data_type_ops/base.py
index d0e1317..7eb2a95 100644
--- a/python/pyspark/pandas/data_type_ops/base.py
+++ b/python/pyspark/pandas/data_type_ops/base.py
@@ -128,7 +128,7 @@ def _as_categorical_type(
)
map_scol = F.create_map(*kvs)
- scol = F.coalesce(map_scol.getItem(index_ops.spark.column),
SF.lit(-1))
+ scol = F.coalesce(map_scol[index_ops.spark.column], SF.lit(-1))
return index_ops._with_new_scol(
scol.cast(spark_type),
field=index_ops._internal.data_fields[0].copy(
diff --git a/python/pyspark/pandas/data_type_ops/categorical_ops.py
b/python/pyspark/pandas/data_type_ops/categorical_ops.py
index 5b02ae0..932b9ed 100644
--- a/python/pyspark/pandas/data_type_ops/categorical_ops.py
+++ b/python/pyspark/pandas/data_type_ops/categorical_ops.py
@@ -69,7 +69,7 @@ class CategoricalOps(DataTypeOps):
*[(SF.lit(code), SF.lit(category)) for code, category in
enumerate(categories)]
)
map_scol = F.create_map(*kvs)
- scol = map_scol.getItem(index_ops.spark.column)
+ scol = map_scol[index_ops.spark.column]
return index_ops._with_new_scol(scol).astype(dtype)
def eq(self, left: IndexOpsLike, right: Any) -> SeriesOrIndex:
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index ab13e9a..faacd7a 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -10854,7 +10854,7 @@ defaultdict(<class 'list'>, {'col..., 'col...})]
for column in percentile_col_names:
cols_dict[column] = list()
for i in range(len(qq)):
- cols_dict[column].append(scol_for(sdf,
column).getItem(i).alias(column))
+ cols_dict[column].append(scol_for(sdf,
column)[i].alias(column))
internal_index_column = SPARK_DEFAULT_INDEX_NAME
cols = []
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]