HyukjinKwon commented on code in PR #39739: URL: https://github.com/apache/spark/pull/39739#discussion_r1086538692
########## python/pyspark/sql/connect/udf.py: ########## @@ -90,7 +91,25 @@ def __init__( ) self.func = func - self._returnType = returnType + + if isinstance(returnType, str): + from pyspark.sql import SparkSession as PySparkSession + + # Currently we don't have a way to have a current Spark session in Spark Connect, and + # pyspark.sql.SparkSession has a centralized logic to control the session creation. + # So uses pyspark.sql.SparkSession for now. Should replace this to using the current + # Spark session for Spark Connect in the future. + assert is_remote() + + self._returnType = ( # a workaround to parse the DataType from DDL strings + PySparkSession.builder.remote() + .getOrCreate() + .createDataFrame(data=[], schema=returnType) + .schema.fields[0] Review Comment: Can we add another assert to throw an error if there are more than one fields? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org