Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/20858#discussion_r177278880
--- Diff: python/pyspark/sql/functions.py ---
@@ -1834,6 +1819,25 @@ def array_contains(col, value):
return Column(sc._jvm.functions.array_contains(_to_java_column(col),
value))
+@since(1.5)
+@ignore_unicode_prefix
+def concat(*cols):
+ """
+ Concatenates multiple input columns together into a single column.
+ The function works with strings, binary columns and arrays of the same
time.
+
+ >>> df = spark.createDataFrame([('abcd','123')], ['s', 'd'])
+ >>> df.select(concat(df.s, df.d).alias('s')).collect()
+ [Row(s=u'abcd123')]
+
+ >>> df = spark.createDataFrame([([1, 2], [3, 4], [5]), ([1, 2], None,
[3])], ['a', 'b', 'c'])
+ >>> df.select(concat(df.a, df.b, df.c).alias("arr")).collect()
+ [Row(arr=[1, 2, 3, 4, 5]), Row(arr=None)]
+ """
+ sc = SparkContext._active_spark_context
+ return Column(sc._jvm.functions.concat(_to_seq(sc, cols,
_to_java_column)))
--- End diff --
Why did we move this down .. ?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]