asfgit closed pull request #23441: [SPARK-26349][PYSPARK] Forbid insecure py4j 
gateways
URL: https://github.com/apache/spark/pull/23441
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 6137ed25a0dd9..64178eb7b58a9 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -115,6 +115,11 @@ def __init__(self, master=None, appName=None, 
sparkHome=None, pyFiles=None,
         ValueError:...
         """
         self._callsite = first_spark_call() or CallSite(None, None, None)
+        if gateway is not None and gateway.gateway_parameters.auth_token is 
None:
+            raise ValueError(
+                "You are trying to pass an insecure Py4j gateway to Spark. 
This"
+                " is not allowed as it is a security risk.")
+
         SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
         try:
             self._do_init(master, appName, sparkHome, pyFiles, environment, 
batchSize, serializer,
diff --git a/python/pyspark/tests/test_context.py 
b/python/pyspark/tests/test_context.py
index 201baf420354d..18d9cd40be9f0 100644
--- a/python/pyspark/tests/test_context.py
+++ b/python/pyspark/tests/test_context.py
@@ -20,6 +20,7 @@
 import threading
 import time
 import unittest
+from collections import namedtuple
 
 from pyspark import SparkFiles, SparkContext
 from pyspark.testing.utils import ReusedPySparkTestCase, PySparkTestCase, 
QuietTest, SPARK_HOME
@@ -246,6 +247,15 @@ def test_startTime(self):
         with SparkContext() as sc:
             self.assertGreater(sc.startTime, 0)
 
+    def test_forbid_insecure_gateway(self):
+        # Fail immediately if you try to create a SparkContext
+        # with an insecure gateway
+        parameters = namedtuple('MockGatewayParameters', 'auth_token')(None)
+        mock_insecure_gateway = namedtuple('MockJavaGateway', 
'gateway_parameters')(parameters)
+        with self.assertRaises(ValueError) as context:
+            SparkContext(gateway=mock_insecure_gateway)
+        self.assertIn("insecure Py4j gateway", str(context.exception))
+
 
 if __name__ == "__main__":
     from pyspark.tests.test_context import *


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to