Repository: spark Updated Branches: refs/heads/master 0169360ef -> e02ac303c
[SPARK-19429][PYTHON][SQL] Support slice arguments in Column.__getitem__ ## What changes were proposed in this pull request? - Add support for `slice` arguments in `Column.__getitem__`. - Remove obsolete `__getslice__` bindings. ## How was this patch tested? Existing unit tests, additional tests covering `[]` with `slice`. Author: zero323 <zero...@users.noreply.github.com> Closes #16771 from zero323/SPARK-19429. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e02ac303 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e02ac303 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e02ac303 Branch: refs/heads/master Commit: e02ac303c6356cdf7fffec7361311d828a723afe Parents: 0169360 Author: zero323 <zero...@users.noreply.github.com> Authored: Mon Feb 13 15:23:56 2017 -0800 Committer: Holden Karau <hol...@us.ibm.com> Committed: Mon Feb 13 15:23:56 2017 -0800 ---------------------------------------------------------------------- python/pyspark/sql/column.py | 11 ++++++++--- python/pyspark/sql/tests.py | 8 ++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e02ac303/python/pyspark/sql/column.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index ec059d6..73c8672 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -180,7 +180,6 @@ class Column(object): # container operators __contains__ = _bin_op("contains") - __getitem__ = _bin_op("apply") # bitwise operators bitwiseOR = _bin_op("bitwiseOR") @@ -236,6 +235,14 @@ class Column(object): raise AttributeError(item) return self.getField(item) + def __getitem__(self, k): + if isinstance(k, slice): + if k.step is not None: + raise ValueError("slice with step is not supported.") + return self.substr(k.start, k.stop) + else: + return _bin_op("apply")(self, k) + def __iter__(self): raise TypeError("Column is not iterable") @@ -267,8 +274,6 @@ class Column(object): raise TypeError("Unexpected type: %s" % type(startPos)) return Column(jc) - __getslice__ = substr - @ignore_unicode_prefix @since(1.5) def isin(self, *cols): http://git-wip-us.apache.org/repos/asf/spark/blob/e02ac303/python/pyspark/sql/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index ab9d3f6..d9d0333 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -874,6 +874,14 @@ class SQLTests(ReusedPySparkTestCase): self.assertTrue(all(isinstance(c, Column) for c in css)) self.assertTrue(isinstance(ci.cast(LongType()), Column)) + def test_column_getitem(self): + from pyspark.sql.functions import col + + self.assertIsInstance(col("foo")[1:3], Column) + self.assertIsInstance(col("foo")[0], Column) + self.assertIsInstance(col("foo")["bar"], Column) + self.assertRaises(ValueError, lambda: col("foo")[0:10:2]) + def test_column_select(self): df = self.df self.assertEqual(self.testData, df.select("*").collect()) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org