Repository: spark Updated Branches: refs/heads/master 42b9eda80 -> 966083105
[SPARK-21712][PYSPARK] Clarify type error for Column.substr() Proposed changes: * Clarify the type error that `Column.substr()` gives. Test plan: * Tested this manually. * Test code: ```python from pyspark.sql.functions import col, lit spark.createDataFrame([['nick']], schema=['name']).select(col('name').substr(0, lit(1))) ``` * Before: ``` TypeError: Can not mix the type ``` * After: ``` TypeError: startPos and length must be the same type. Got <class 'int'> and <class 'pyspark.sql.column.Column'>, respectively. ``` Author: Nicholas Chammas <nicholas.cham...@gmail.com> Closes #18926 from nchammas/SPARK-21712-substr-type-error. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/96608310 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/96608310 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/96608310 Branch: refs/heads/master Commit: 96608310501a43fa4ab9f2697f202d655dba98c5 Parents: 42b9eda Author: Nicholas Chammas <nicholas.cham...@gmail.com> Authored: Wed Aug 16 11:19:15 2017 +0900 Committer: hyukjinkwon <gurwls...@gmail.com> Committed: Wed Aug 16 11:19:15 2017 +0900 ---------------------------------------------------------------------- python/pyspark/sql/column.py | 10 ++++++++-- python/pyspark/sql/tests.py | 12 ++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/96608310/python/pyspark/sql/column.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py index e753ed4..b172f38 100644 --- a/python/pyspark/sql/column.py +++ b/python/pyspark/sql/column.py @@ -406,8 +406,14 @@ class Column(object): [Row(col=u'Ali'), Row(col=u'Bob')] """ if type(startPos) != type(length): - raise TypeError("Can not mix the type") - if isinstance(startPos, (int, long)): + raise TypeError( + "startPos and length must be the same type. " + "Got {startPos_t} and {length_t}, respectively." + .format( + startPos_t=type(startPos), + length_t=type(length), + )) + if isinstance(startPos, int): jc = self._jc.substr(startPos, length) elif isinstance(startPos, Column): jc = self._jc.substr(startPos._jc, length._jc) http://git-wip-us.apache.org/repos/asf/spark/blob/96608310/python/pyspark/sql/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index cf2c473..45a3f9e 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -1220,6 +1220,18 @@ class SQLTests(ReusedPySparkTestCase): rndn2 = df.select('key', functions.randn(0)).collect() self.assertEqual(sorted(rndn1), sorted(rndn2)) + def test_string_functions(self): + from pyspark.sql.functions import col, lit + df = self.spark.createDataFrame([['nick']], schema=['name']) + self.assertRaisesRegexp( + TypeError, + "must be the same type", + lambda: df.select(col('name').substr(0, lit(1)))) + if sys.version_info.major == 2: + self.assertRaises( + TypeError, + lambda: df.select(col('name').substr(long(0), long(1)))) + def test_array_contains_function(self): from pyspark.sql.functions import array_contains --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org