zhengruifeng commented on code in PR #52001:
URL: https://github.com/apache/spark/pull/52001#discussion_r2303766929


##########
python/pyspark/sql/tests/test_udtf.py:
##########
@@ -3490,39 +3490,38 @@ def eval(self):
                     udtf(TestUDTF, returnType=ret_type)().collect()
 
 
-def test_udtf_with_collated_string_types(self):
-    @udtf(
-        "out1 string, out2 string collate UTF8_BINARY, out3 string collate 
UTF8_LCASE,"
-        " out4 string collate UNICODE"
-    )
-    class MyUDTF:
-        def eval(self, v1, v2, v3, v4):
-            yield (v1 + "1", v2 + "2", v3 + "3", v4 + "4")
-
-    schema = StructType(
-        [
-            StructField("col1", StringType(), True),
-            StructField("col2", StringType("UTF8_BINARY"), True),
-            StructField("col3", StringType("UTF8_LCASE"), True),
-            StructField("col4", StringType("UNICODE"), True),
+    def test_udtf_with_collated_string_types(self):
+        class TestCollatedUDTF:
+            def eval(self, v1, v2, v3, v4):
+                yield (v1 + "1", v2 + "2", v3 + "3", v4 + "4")
+
+        func = udtf(
+            TestCollatedUDTF,
+            returnType="out1 string, out2 string collate UTF8_BINARY,"
+                       " out3 string collate UTF8_LCASE, out4 string collate 
UNICODE"
+        )
+        self.spark.udtf.register("test_collated_udtf", func)
+
+        result_df = self.spark.sql(
+            """
+               SELECT * FROM test_collated_udtf(
+                       CAST('hello' AS STRING),
+                       CAST('hello' AS STRING COLLATE UTF8_BINARY),
+                       CAST('hello' AS STRING COLLATE UTF8_LCASE),
+                       CAST('hello' AS STRING COLLATE UNICODE))
+            """)
+
+        expected_row = ("hello1", "hello2", "hello3", "hello4")
+        self.assertEqual(result_df.collect()[0], expected_row)
+
+        expected_output_types = [
+            StringType(),
+            StringType("UTF8_BINARY"),
+            StringType("UTF8_LCASE"),
+            StringType("UNICODE"),
         ]
-    )
-    df = self.spark.createDataFrame([("hello",) * 4], schema=schema)
-
-    df_out = df.select(MyUDTF(df.col1, df.col2, df.col3, df.col4).alias("out"))

Review Comment:
   if we use dataframe api, the query is like:
   ```
   df_out = df.lateralJoin(MyUDTF(col("col1").outer(), col("col2").outer(), 
col("col3").outer(), col("col4").outer())).select("out1", "out2", "out3", 
"out4")
   
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to