dtenedor commented on code in PR #42174:
URL: https://github.com/apache/spark/pull/42174#discussion_r1279997463
##########
python/pyspark/sql/tests/test_udtf.py:
##########
@@ -1444,6 +1447,83 @@ def terminate(self):
assertSchemaEqual(df.schema, StructType().add("col1",
IntegerType()))
assertDataFrameEqual(df, [Row(col1=10), Row(col1=100)])
+ def test_udtf_call_with_partition_by(self):
+ class TestUDTF:
+ def __init__(self):
+ self._sum = 0
+
+ def eval(self, row: Row):
+ self._sum += row["x"]
+
+ def terminate(self):
+ yield self._sum,
+
+ func = udtf(TestUDTF, returnType="a: int")
+ self.spark.udtf.register("test_udtf_pb", func)
+
+ def actual(query: str) -> str:
+ df = self.spark.sql(query)
+ value = df.collect()[0][0]
+ stripExprIds = re.sub(r'#[\d]+', r'#xx', value)
+ stripPlanIds = re.sub(
+ r'plan_id=[\d]+', r'plan_id=xx', stripExprIds)
+ stripEvalType = re.sub(
+ r'\+- .....EvalPythonUDTF test_udtf_pb.*', r'+- EvalPythonUDTF
test_udtf_pb',
+ stripPlanIds)
+ print('Query plan: ' + stripEvalType)
+ return stripEvalType.strip('\n')
+ def expected(input: str) -> str:
+ return textwrap.dedent(input).strip('\n')
+
+ self.assertEqual(
Review Comment:
Done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]