df: --------- a|b|c ------- 1|m|n 1|x | j 2|m|x ...
import pyspark.sql.functions as F from pyspark.sql.types import MapType, StringType def my_zip(c, d): return dict(zip(c, d)) my_zip = F.udf(_my_zip, MapType(StingType(), StringType(), True), True) df.groupBy('a').agg(my_zip(collect_list('c'), collect_list('d')).alias('named_list')) -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/udf-of-aggregation-in-pyspark-dataframe-tp27811p27814.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe e-mail: user-unsubscr...@spark.apache.org