phaniarnab commented on a change in pull request #1454:
URL: https://github.com/apache/systemds/pull/1454#discussion_r748160371
##########
File path: src/assembly/bin.xml
##########
@@ -91,23 +92,28 @@
<include>*:commons-lang3</include>
<include>*:commons-logging*</include>
<include>*:commons-math3*</include>
+ <include>*:commons-text*</include>
<include>*:guava*</include>
<include>*:hadoop-auth*</include>
<include>*:hadoop-client*</include>
<include>*:hadoop-common*</include>
<include>*:hadoop-hdfs*</include>
<include>*:hadoop-mapreduce-client*</include>
<include>*:hadoop-yarn*</include>
+ <include>*:htrace-core*</include>
<include>*:jackson-core-asl*</include>
<include>*:jackson-mapper-asl*</include>
<include>*:janino*</include>
<include>*:log4j*</include>
<include>*:netty*</include>
<include>*:protobuf-java*</include>
<include>*:py4j*</include>
+ <include>*:re2j*</include>
<include>*:slf4j-api*</include>
<include>*:slf4j-log4j*</include>
<include>*:spark-core*</include>
+ <include>*:stax2-api*</include>
+ <include>*:woodstox*</include>
Review comment:
I see you are adding 6 new libraries to the bin. How are they going to
increase the size of the binary?
Also, I recommend documenting the reasons why these are needed if you plan
to merge in the release branch?
##########
File path: src/main/python/pre_setup.py
##########
@@ -26,40 +26,56 @@
from zipfile import ZipFile
this_path = os.path.dirname(os.path.realpath(__file__))
-python_dir = 'systemds'
-java_dir = 'systemds-java'
-java_dir_full_path = os.path.join(this_path, python_dir, java_dir)
-if os.path.exists(java_dir_full_path):
- shutil.rmtree(java_dir_full_path, True)
-root_dir = os.path.dirname(os.path.dirname(os.path.dirname(this_path)))
+PYTHON_DIR = 'systemds'
+
+# Go three directories out this is the root dir of systemds repository
+SYSTEMDS_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(this_path)))
# temporary directory for unzipping of bin zip
TMP_DIR = os.path.join(this_path, 'tmp')
if os.path.exists(TMP_DIR):
shutil.rmtree(TMP_DIR, True)
os.mkdir(TMP_DIR)
+
+# Copy jar files from release artifact.
+LIB_DIR = os.path.join(this_path, PYTHON_DIR, 'lib')
+if os.path.exists(LIB_DIR):
+ shutil.rmtree(LIB_DIR, True)
SYSTEMDS_BIN = 'systemds-*-bin.zip'
-for file in os.listdir(os.path.join(root_dir, 'target')):
+for file in os.listdir(os.path.join(SYSTEMDS_ROOT, 'target')):
+ # Take jar files from bin release file
if fnmatch.fnmatch(file, SYSTEMDS_BIN):
- new_path = os.path.join(TMP_DIR, file)
- shutil.copyfile(os.path.join(root_dir, 'target', file), new_path)
+ systemds_bin_zip = os.path.join(SYSTEMDS_ROOT, 'target', file)
extract_dir = os.path.join(TMP_DIR)
- with ZipFile(new_path, 'r') as zip:
+
+ with ZipFile(systemds_bin_zip, 'r') as zip:
for f in zip.namelist():
split_path = os.path.split(os.path.dirname(f))
if split_path[1] == 'lib':
zip.extract(f, TMP_DIR)
unzipped_dir_name = file.rsplit('.', 1)[0]
- shutil.copytree(os.path.join(TMP_DIR, unzipped_dir_name),
java_dir_full_path)
- if os.path.exists(TMP_DIR):
- shutil.rmtree(TMP_DIR, True)
+ shutil.copytree(os.path.join(TMP_DIR, unzipped_dir_name, 'lib'),
LIB_DIR)
+
+# Take hadoop binaries.
+HADOOP_DIR_SRC = os.path.join(SYSTEMDS_ROOT, 'target', 'lib', 'hadoop')
+if os.path.exists(HADOOP_DIR_SRC):
+ shutil.copytree(HADOOP_DIR_SRC, os.path.join(LIB_DIR,"hadoop"))
-root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
-shutil.copyfile(os.path.join(root_dir, 'LICENSE'), 'LICENSE')
-shutil.copyfile(os.path.join(root_dir, 'NOTICE'), 'NOTICE')
+# Take conf files.
+CONF_DIR = os.path.join(this_path, PYTHON_DIR, 'conf')
+if not os.path.exists(CONF_DIR):
+ os.mkdir(CONF_DIR)
+shutil.copy(os.path.join(SYSTEMDS_ROOT,'conf', 'log4j.properties'),
os.path.join(this_path, PYTHON_DIR, 'conf', 'log4j.properties'))
+shutil.copy(os.path.join(SYSTEMDS_ROOT,'conf',
'SystemDS-config-defaults.xml'), os.path.join(this_path, PYTHON_DIR, 'conf',
'SystemDS-config-defaults.xml'))
-# delete old build and dist path
+SYSTEMDS_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
+shutil.copyfile(os.path.join(SYSTEMDS_ROOT, 'LICENSE'), 'LICENSE')
+shutil.copyfile(os.path.join(SYSTEMDS_ROOT, 'NOTICE'), 'NOTICE')
+
+# Remove old build and dist path
+if os.path.exists(TMP_DIR):
+ shutil.rmtree(TMP_DIR, True)
Review comment:
Changes in the release-related files need testing @j143 and myself, as
they can break the release automation.
I would recommend sticking to necessary changes and leaving the improvements
for future releases.
##########
File path: src/main/python/pre_setup.py
##########
@@ -26,40 +26,56 @@
from zipfile import ZipFile
this_path = os.path.dirname(os.path.realpath(__file__))
-python_dir = 'systemds'
-java_dir = 'systemds-java'
-java_dir_full_path = os.path.join(this_path, python_dir, java_dir)
-if os.path.exists(java_dir_full_path):
- shutil.rmtree(java_dir_full_path, True)
-root_dir = os.path.dirname(os.path.dirname(os.path.dirname(this_path)))
+PYTHON_DIR = 'systemds'
+
+# Go three directories out this is the root dir of systemds repository
+SYSTEMDS_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(this_path)))
# temporary directory for unzipping of bin zip
TMP_DIR = os.path.join(this_path, 'tmp')
if os.path.exists(TMP_DIR):
shutil.rmtree(TMP_DIR, True)
os.mkdir(TMP_DIR)
+
+# Copy jar files from release artifact.
+LIB_DIR = os.path.join(this_path, PYTHON_DIR, 'lib')
+if os.path.exists(LIB_DIR):
+ shutil.rmtree(LIB_DIR, True)
SYSTEMDS_BIN = 'systemds-*-bin.zip'
-for file in os.listdir(os.path.join(root_dir, 'target')):
+for file in os.listdir(os.path.join(SYSTEMDS_ROOT, 'target')):
+ # Take jar files from bin release file
if fnmatch.fnmatch(file, SYSTEMDS_BIN):
- new_path = os.path.join(TMP_DIR, file)
- shutil.copyfile(os.path.join(root_dir, 'target', file), new_path)
+ systemds_bin_zip = os.path.join(SYSTEMDS_ROOT, 'target', file)
extract_dir = os.path.join(TMP_DIR)
- with ZipFile(new_path, 'r') as zip:
+
+ with ZipFile(systemds_bin_zip, 'r') as zip:
for f in zip.namelist():
split_path = os.path.split(os.path.dirname(f))
if split_path[1] == 'lib':
zip.extract(f, TMP_DIR)
unzipped_dir_name = file.rsplit('.', 1)[0]
- shutil.copytree(os.path.join(TMP_DIR, unzipped_dir_name),
java_dir_full_path)
- if os.path.exists(TMP_DIR):
- shutil.rmtree(TMP_DIR, True)
+ shutil.copytree(os.path.join(TMP_DIR, unzipped_dir_name, 'lib'),
LIB_DIR)
+
+# Take hadoop binaries.
+HADOOP_DIR_SRC = os.path.join(SYSTEMDS_ROOT, 'target', 'lib', 'hadoop')
+if os.path.exists(HADOOP_DIR_SRC):
+ shutil.copytree(HADOOP_DIR_SRC, os.path.join(LIB_DIR,"hadoop"))
Review comment:
Please document why do we need the hadoop and winutils dll/libs?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]