HyukjinKwon commented on code in PR #45377:
URL: https://github.com/apache/spark/pull/45377#discussion_r1549242008
##########
python/pyspark/errors/utils.py:
##########
@@ -119,3 +124,61 @@ def get_message_template(self, error_class: str) -> str:
message_template = main_message_template + " " +
sub_message_template
return message_template
+
+
+def _capture_call_site(fragment: str) -> None:
+ """
+ Capture the call site information including file name, line number, and
function name.
+
+ This function updates the thread-local storage from server side
(PySparkCurrentOrigin)
+ with the current call site information when a PySpark API function is
called.
+
+ Parameters
+ ----------
+ func_name : str
+ The name of the PySpark API function being captured.
+
+ Notes
+ -----
+ The call site information is used to enhance error messages with the exact
location
+ in the user code that led to the error.
+ """
+ from pyspark.sql.session import SparkSession
+
+ spark = SparkSession._getActiveSessionOrCreate()
+ assert spark._jvm is not None
+
+ stack = inspect.stack()
+ frame_info = stack[-1]
+ filename = frame_info.filename
+ lineno = frame_info.lineno
+ call_site = f"{filename}:{lineno}"
+
+ pyspark_origin =
spark._jvm.org.apache.spark.sql.catalyst.trees.PySparkCurrentOrigin
+ pyspark_origin.set(fragment, call_site)
+
+
+def with_origin(func: Callable[..., Any]) -> Callable[..., Any]:
Review Comment:
Make this private too with `_ with_origin` if this isn't supposed to be
referred in other modules.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]