Github user vanzin commented on a diff in the pull request:
https://github.com/apache/spark/pull/21198#discussion_r185242337
--- Diff: python/run-tests.py ---
@@ -77,13 +79,33 @@ def run_individual_python_test(test_name,
pyspark_python):
'PYSPARK_PYTHON': which(pyspark_python),
'PYSPARK_DRIVER_PYTHON': which(pyspark_python)
})
+
+ # Create a unique temp directory under 'target/' for each run. The
TMPDIR variable is
+ # recognized by the tempfile module to override the default system
temp directory.
+ target_dir = os.path.abspath(os.path.join(os.path.dirname(__file__),
'target'))
+ if not os.path.isdir(target_dir):
+ os.mkdir(target_dir)
+ tmp_dir = os.path.join(target_dir, str(uuid.uuid4()))
+ if not os.path.isdir(tmp_dir):
+ os.mkdir(tmp_dir)
+ env["TMPDIR"] = tmp_dir
+
+ # Also override the JVM's temp directory by setting driver and
executor options.
+ spark_args = [
+ "--conf",
"spark.driver.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir),
+ "--conf",
"spark.executor.extraJavaOptions=-Djava.io.tmpdir={0}".format(tmp_dir),
+ "pyspark-shell"
+ ]
+ env["PYSPARK_SUBMIT_ARGS"] = " ".join(spark_args)
+
LOGGER.info("Starting test(%s): %s", pyspark_python, test_name)
start_time = time.time()
try:
per_test_output = tempfile.TemporaryFile()
retcode = subprocess.Popen(
[os.path.join(SPARK_HOME, "bin/pyspark"), test_name],
stderr=per_test_output, stdout=per_test_output, env=env).wait()
+ shutil.rmtree(tmp_dir, ignore_errors=True)
--- End diff --
I wanted to leave the filed temp directories behind in case they might
contain useful info.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]