Re: [PR] [SPARK-53975][PYTHON] Adds basic Python worker logging support [spark]

via GitHub Tue, 21 Oct 2025 19:59:45 -0700


ueshin commented on code in PR #52689:
URL: https://github.com/apache/spark/pull/52689#discussion_r2450282483



##########
python/pyspark/sql/tests/test_udf.py:
##########
@@ -1551,6 +1554,98 @@ def check_struct_binary_type(s):
                 expected = 
self.spark.createDataFrame([Row(type_name=expected_type)])
                 assertDataFrameEqual(result, expected)
 
+    @unittest.skipIf(is_remote_only(), "Requires JVM access")
+    def test_udf_with_logging(self):
+        @udf
+        def my_udf():
+            logger = logging.getLogger("test")
+            print("print to stdout ❤", file=sys.stdout)
+            print("print to stderr 😀", file=sys.stderr)
+            try:
+                1 / 0
+            except Exception:
+                logger.exception("exception")
+            return "x"
+
+        # Logging is disabled by default
+        assertDataFrameEqual(
+            self.spark.range(1).select(my_udf().alias("result")), 
[Row(result="x")]
+        )
+        
self.assertEqual(self.spark.table("system.session.python_worker_logs").count(), 
0)
+
+        with self.sql_conf({"spark.sql.pyspark.worker.logging.enabled": 
"true"}):
+            assertDataFrameEqual(
+                self.spark.range(1).select(my_udf().alias("result")), 
[Row(result="x")]
+            )
+
+        logs = self.spark.table("system.session.python_worker_logs")
+
+        assertDataFrameEqual(
+            logs.select("level", "msg", "context", "logger"),
+            [
+                Row(
+                    level="INFO",
+                    msg="print to stdout ❤",
+                    context={"func_name": my_udf.__name__},
+                    logger="stdout",
+                ),
+                Row(
+                    level="ERROR",
+                    msg="print to stderr 😀",
+                    context={"func_name": my_udf.__name__},
+                    logger="stderr",
+                ),
+                Row(
+                    level="ERROR",
+                    msg="exception",
+                    context={"func_name": my_udf.__name__},
+                    logger="test",
+                ),
+            ],
+        )
+
+        self.assertEqual(logs.where("exception is not 
null").select("exception").count(), 1)
+
+    @unittest.skipIf(is_remote_only(), "Requires JVM access")

Review Comment:
   It's necessary to clean up the log for each test.
   
   
https://github.com/apache/spark/blob/0eed72644332e49de682b31cbcbfe13ff4931756/python/pyspark/testing/connectutils.py#L199-L202



##########
python/pyspark/sql/tests/test_udf.py:
##########
@@ -1551,6 +1554,98 @@ def check_struct_binary_type(s):
                 expected = 
self.spark.createDataFrame([Row(type_name=expected_type)])
                 assertDataFrameEqual(result, expected)
 
+    @unittest.skipIf(is_remote_only(), "Requires JVM access")
+    def test_udf_with_logging(self):
+        @udf
+        def my_udf():
+            logger = logging.getLogger("test")
+            print("print to stdout ❤", file=sys.stdout)
+            print("print to stderr 😀", file=sys.stderr)
+            try:
+                1 / 0
+            except Exception:
+                logger.exception("exception")
+            return "x"
+
+        # Logging is disabled by default
+        assertDataFrameEqual(
+            self.spark.range(1).select(my_udf().alias("result")), 
[Row(result="x")]
+        )
+        
self.assertEqual(self.spark.table("system.session.python_worker_logs").count(), 
0)
+
+        with self.sql_conf({"spark.sql.pyspark.worker.logging.enabled": 
"true"}):
+            assertDataFrameEqual(
+                self.spark.range(1).select(my_udf().alias("result")), 
[Row(result="x")]
+            )
+
+        logs = self.spark.table("system.session.python_worker_logs")

Review Comment:
   So far I don't have a plan to make it configurable, but I think we can if 
necessary.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [SPARK-53975][PYTHON] Adds basic Python worker logging support [spark]

Reply via email to