ueshin commented on code in PR #41948:
URL: https://github.com/apache/spark/pull/41948#discussion_r1264182071


##########
python/pyspark/sql/tests/test_udtf.py:
##########
@@ -719,6 +726,153 @@ def terminate(self):
         self.assertIn("Evaluate the input row", cls.eval.__doc__)
         self.assertIn("Terminate the UDTF", cls.terminate.__doc__)
 
+    def test_simple_udtf_with_analyze(self):
+        class TestUDTF:
+            @staticmethod
+            def analyze() -> StructType:
+                return StructType().add("c1", StringType()).add("c2", 
StringType())
+
+            def eval(self):
+                yield "hello", "world"
+
+        func = udtf(TestUDTF)
+        rows = func().collect()
+        self.assertEqual(rows, [Row(c1="hello", c2="world")])
+
+    def test_udtf_with_analyze(self):
+        class TestUDTF:
+            @staticmethod
+            def analyze(a) -> StructType:
+                assert isinstance(a, dict)
+                assert isinstance(a["data_type"], DataType)
+                assert a["value"] is not None
+                assert a["is_table"] is False
+                return StructType().add("a", a["data_type"])
+
+            def eval(self, a):
+                yield a,
+
+        func = udtf(TestUDTF)
+
+        df1 = func(lit(1))
+        self.assertEquals(df1.schema, StructType().add("a", IntegerType()))
+        self.assertEqual(df1.collect(), [Row(a=1)])
+
+        df2 = func(lit("x"))
+        self.assertEquals(df2.schema, StructType().add("a", StringType()))
+        self.assertEqual(df2.collect(), [Row(a="x")])
+
+    def test_udtf_with_analyze_multiple_arguments(self):
+        class TestUDTF:
+            @staticmethod
+            def analyze(a, b) -> StructType:
+                return StructType().add("a", a["data_type"]).add("b", 
b["data_type"])
+
+            def eval(self, a, b):
+                yield a, b
+
+        func = udtf(TestUDTF)
+
+        df = func(lit(1), lit("x"))
+        self.assertEquals(df.schema, StructType().add("a", 
IntegerType()).add("b", StringType()))
+        self.assertEqual(df.collect(), [Row(a=1, b="x")])
+
+    def test_udtf_with_analyze_table_argument(self):
+        class TestUDTF:
+            @staticmethod
+            def analyze(a) -> StructType:
+                assert isinstance(a, dict)
+                assert isinstance(a["data_type"], StructType)
+                assert a["value"] is None
+                assert a["is_table"] is True
+                return StructType().add("a", a["data_type"][0].dataType)
+
+            def eval(self, a: Row):
+                if a["id"] > 5:
+                    yield a["id"],
+
+        func = udtf(TestUDTF)
+        self.spark.udtf.register("test_udtf", func)
+
+        df = self.spark.sql("SELECT * FROM test_udtf(TABLE (SELECT id FROM 
range(0, 8)))")
+        self.assertEqual(df.schema, StructType().add("a", LongType()))
+        self.assertEqual(df.collect(), [Row(a=6), Row(a=7)])
+
+    def test_udtf_with_neither_return_type_nor_analyze(self):
+        class TestUDTF:
+            def eval(self):
+                yield "hello", "world"
+
+        with self.assertRaises(PySparkAttributeError) as e:
+            udtf(TestUDTF)
+
+        self.check_error(
+            exception=e.exception,
+            error_class="INVALID_UDTF_RETURN_TYPE",
+            message_parameters={"name": "TestUDTF"},
+        )
+
+    def test_udtf_with_non_static_analyze(self):
+        class TestUDTF:
+            def analyze(self) -> StructType:
+                return StructType().add("c1", StringType()).add("c2", 
StringType())
+
+            def eval(self):
+                yield "hello", "world"
+
+        with self.assertRaises(PySparkAttributeError) as e:
+            udtf(TestUDTF)
+
+        self.check_error(
+            exception=e.exception,
+            error_class="INVALID_UDTF_RETURN_TYPE",
+            message_parameters={"name": "TestUDTF"},
+        )
+
+    def test_udtf_with_analyze_returning_non_struct(self):

Review Comment:
   Updated/added:
   - test_udtf_with_analyze
   - test_udtf_with_analyze_table_argument_adding_columns
   - test_udtf_with_analyze_table_argument_repeating_rows
   - test_udtf_with_analyze_raising_an_exception
   - test_udtf_with_analyze_null_literal
   - test_udtf_with_analyze_unknown_key



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to