Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20390#discussion_r163756176 --- Diff: python/pyspark/sql/dataframe.py --- @@ -1881,6 +1881,28 @@ def toDF(self, *cols): jdf = self._jdf.toDF(self._jseq(cols)) return DataFrame(jdf, self.sql_ctx) + @since(2.4) + def colRegex(self, colName): + """ + Selects column based on the column name specified as a regex and return it + as :class:`Column`. + + :param colName: string, column name specified as a regex. + + >>> df = spark.createDataFrame([("a", 1), ("b", 2), ("c", 3)]) + >>> df.select(df.colRegex("`(_1)?+.+`")).show() + +---+ + | _2| + +---+ + | 1| + | 2| + | 3| + +---+ + """ + assert isinstance(colName, basestring), "colName should be a string" --- End diff -- I think `TypeError` with an if could be more correct.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org