This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new ee3a612b28c1 [SPARK-48705][PYTHON] Explicitly use worker_main when it
starts with pyspark
ee3a612b28c1 is described below
commit ee3a612b28c185e7f4d558c09efbf93ebe46be2b
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Wed Jun 26 14:42:06 2024 +0900
[SPARK-48705][PYTHON] Explicitly use worker_main when it starts with pyspark
### What changes were proposed in this pull request?
This PR proposes to use worker modules when it starts with `pyspark`.
### Why are the changes needed?
Because now daemon uses worker module from the first argument, it could
break if users are using custom workers with providing parameters. They aren't
actual API but it is better to be more strict on the argument provided.
Here is an example of sentry-python
```
ModuleNotFoundError Traceback (most recent call last)
File <command-558609978944307>, line 4
1 import sentry_sdk
2 from sentry_sdk.integrations.spark import SparkWorkerIntegration
----> 4 sentry_sdk.init(
5 dsn="https://examplePublicKeyo0.ingest.sentry.io/0",
6 traces_sample_rate=1.0,
7 profiles_sample_rate=1.0,
8 integrations=[
9 SparkWorkerIntegration(),
10 ],
11 )
File /.../python3.10/site-packages/sentry_sdk/hub.py:121, in _init(*args,
**kwargs)
115 def _init(*args, **kwargs):
116 # type: (*Optional[str], **Any) -> ContextManager[Any]
117 """Initializes the SDK and optionally integrations.
118
119 This takes the same arguments as the client constructor.
120 """
--> 121 client = Client(*args, **kwargs) # type: ignore
122 Hub.current.bind_client(client)
123 _check_python_deprecations()
File /.../python3.10/site-packages/sentry_sdk/client.py:128, in
_Client.__init__(self, *args, **kwargs)
124 def __init__(self, *args, **kwargs):
125 # type: (*Any, **Any) -> None
126 self.options = get_options(*args, **kwargs) # type: Dict[str,
Any]
--> 128 self._init_impl()
File /.../python3.10/site-packages/sentry_sdk/client.py:162, in
_Client._init_impl(self)
155 if self.options["request_bodies"] not in request_bodies:
156 raise ValueError(
157 "Invalid value for request_bodies. Must be one of
{}".format(
158 request_bodies
159 )
160 )
--> 162 self.integrations = setup_integrations(
163 self.options["integrations"],
164 with_defaults=self.options["default_integrations"],
165 with_auto_enabling_integrations=self.options[
166 "auto_enabling_integrations"
167 ],
168 )
170 sdk_name = get_sdk_name(list(self.integrations.keys()))
171 SDK_INFO["name"] = sdk_name
File /.../python3.10/site-packages/sentry_sdk/integrations/__init__.py:124,
in setup_integrations(integrations, with_defaults,
with_auto_enabling_integrations)
120 logger.debug(
121 "Setting up previously not enabled integration %s", identifier
122 )
123 try:
--> 124 type(integration).setup_once()
125 except NotImplementedError:
126 if getattr(integration, "install", None) is not None:
File
/.../python3.10/site-packages/sentry_sdk/integrations/spark/spark_worker.py:31,
in SparkWorkerIntegration.setup_once()
28 staticmethod
29 def setup_once():
30 # type: () -> None
---> 31 import pyspark.daemon as original_daemon
33 original_daemon.worker_main = _sentry_worker_main
File /.../python/pyspark/daemon.py:40
37 if len(sys.argv) > 1:
38 import importlib
---> 40 worker_module = importlib.import_module(sys.argv[1])
41 worker_main = worker_module.main
42 else:
File /usr/lib/python3.10/importlib/__init__.py:126, in import_module(name,
package)
124 break
125 level += 1
--> 126 return _bootstrap._gcd_import(name[level:], package, level)
```
### Does this PR introduce _any_ user-facing change?
Yes, it recovers the support of
[sentry-python](https://github.com/getsentry/sentry-python).
### How was this patch tested?
Manually tested.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #47077 from HyukjinKwon/SPARK-48705.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/daemon.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/python/pyspark/daemon.py b/python/pyspark/daemon.py
index bbbc495d053e..a23af109ea6d 100644
--- a/python/pyspark/daemon.py
+++ b/python/pyspark/daemon.py
@@ -30,7 +30,7 @@ from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL,
SIG_IGN, SIGINT
from pyspark.serializers import read_int, write_int, write_with_length,
UTF8Deserializer
-if len(sys.argv) > 1:
+if len(sys.argv) > 1 and sys.argv[1].startswith("pyspark"):
import importlib
worker_module = importlib.import_module(sys.argv[1])
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]