itholic commented on code in PR #45377:
URL: https://github.com/apache/spark/pull/45377#discussion_r1549469336
##########
python/pyspark/errors/utils.py:
##########
@@ -119,3 +124,61 @@ def get_message_template(self, error_class: str) -> str:
message_template = main_message_template + " " +
sub_message_template
return message_template
+
+
+def _capture_call_site(fragment: str) -> None:
+ """
+ Capture the call site information including file name, line number, and
function name.
+
+ This function updates the thread-local storage from server side
(PySparkCurrentOrigin)
+ with the current call site information when a PySpark API function is
called.
+
+ Parameters
+ ----------
+ func_name : str
+ The name of the PySpark API function being captured.
+
+ Notes
+ -----
+ The call site information is used to enhance error messages with the exact
location
+ in the user code that led to the error.
+ """
+ from pyspark.sql.session import SparkSession
+
+ spark = SparkSession._getActiveSessionOrCreate()
+ assert spark._jvm is not None
+
+ stack = inspect.stack()
+ frame_info = stack[-1]
+ filename = frame_info.filename
+ lineno = frame_info.lineno
+ call_site = f"{filename}:{lineno}"
+
+ pyspark_origin =
spark._jvm.org.apache.spark.sql.catalyst.trees.PySparkCurrentOrigin
+ pyspark_origin.set(fragment, call_site)
+
+
+def with_origin(func: Callable[..., Any]) -> Callable[..., Any]:
Review Comment:
Actually this is initially designed to be applied to individual method, but
let me make it private since we're not having any use case for now.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]