Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/21650#discussion_r205024958
--- Diff: python/pyspark/sql/tests.py ---
@@ -5060,6 +5049,147 @@ def test_type_annotation(self):
df = self.spark.range(1).select(pandas_udf(f=_locals['noop'],
returnType='bigint')('id'))
self.assertEqual(df.first()[0], 0)
+ def test_mixed_udf(self):
+ import pandas as pd
+ from pyspark.sql.functions import col, udf, pandas_udf
+
+ df = self.spark.range(0, 1).toDF('v')
+
+ # Test mixture of multiple UDFs and Pandas UDFs
+
+ @udf('int')
+ def f1(x):
+ assert type(x) == int
+ return x + 1
+
+ @pandas_udf('int')
+ def f2(x):
+ assert type(x) == pd.Series
+ return x + 10
+
+ @udf('int')
+ def f3(x):
+ assert type(x) == int
+ return x + 100
+
+ @pandas_udf('int')
+ def f4(x):
+ assert type(x) == pd.Series
+ return x + 1000
+
+ # Test mixed udfs in a single projection
+ df1 = df \
+ .withColumn('f1', f1(col('v'))) \
+ .withColumn('f2', f2(col('v'))) \
+ .withColumn('f3', f3(col('v'))) \
+ .withColumn('f4', f4(col('v'))) \
+ .withColumn('f2_f1', f2(col('f1'))) \
+ .withColumn('f3_f1', f3(col('f1'))) \
--- End diff --
This looks testing udf + udf
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]