phaniarnab commented on a change in pull request #1454:
URL: https://github.com/apache/systemds/pull/1454#discussion_r748160371



##########
File path: src/assembly/bin.xml
##########
@@ -91,23 +92,28 @@
                                <include>*:commons-lang3</include>
                                <include>*:commons-logging*</include>
                                <include>*:commons-math3*</include>
+                               <include>*:commons-text*</include>
                                <include>*:guava*</include>
                                <include>*:hadoop-auth*</include>
                                <include>*:hadoop-client*</include>
                                <include>*:hadoop-common*</include>
                                <include>*:hadoop-hdfs*</include>
                                <include>*:hadoop-mapreduce-client*</include>
                                <include>*:hadoop-yarn*</include>
+                               <include>*:htrace-core*</include>
                                <include>*:jackson-core-asl*</include>
                                <include>*:jackson-mapper-asl*</include>
                                <include>*:janino*</include>
                                <include>*:log4j*</include>
                                <include>*:netty*</include>
                                <include>*:protobuf-java*</include>
                                <include>*:py4j*</include>
+                               <include>*:re2j*</include>
                                <include>*:slf4j-api*</include>
                                <include>*:slf4j-log4j*</include>
                                <include>*:spark-core*</include>
+                               <include>*:stax2-api*</include>
+                               <include>*:woodstox*</include>

Review comment:
       I see you are adding 6 new libraries to the bin. How are they going to 
increase the size of the binary?
   Also, I recommend documenting the reasons why these are needed if you plan 
to merge in the release branch?

##########
File path: src/main/python/pre_setup.py
##########
@@ -26,40 +26,56 @@
 from zipfile import ZipFile
 
 this_path = os.path.dirname(os.path.realpath(__file__))
-python_dir = 'systemds'
-java_dir = 'systemds-java'
-java_dir_full_path = os.path.join(this_path, python_dir, java_dir)
-if os.path.exists(java_dir_full_path):
-    shutil.rmtree(java_dir_full_path, True)
-root_dir = os.path.dirname(os.path.dirname(os.path.dirname(this_path)))
+PYTHON_DIR = 'systemds'
+
+# Go three directories out this is the root dir of systemds repository
+SYSTEMDS_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(this_path))) 
 
 # temporary directory for unzipping of bin zip
 TMP_DIR = os.path.join(this_path, 'tmp')
 if os.path.exists(TMP_DIR):
     shutil.rmtree(TMP_DIR, True)
 os.mkdir(TMP_DIR)
 
+
+# Copy jar files from release artifact.
+LIB_DIR = os.path.join(this_path, PYTHON_DIR, 'lib')
+if os.path.exists(LIB_DIR):
+    shutil.rmtree(LIB_DIR, True)
 SYSTEMDS_BIN = 'systemds-*-bin.zip'
-for file in os.listdir(os.path.join(root_dir, 'target')):
+for file in os.listdir(os.path.join(SYSTEMDS_ROOT, 'target')):
+    # Take jar files from bin release file
     if fnmatch.fnmatch(file, SYSTEMDS_BIN):
-        new_path = os.path.join(TMP_DIR, file)
-        shutil.copyfile(os.path.join(root_dir, 'target', file), new_path)
+        systemds_bin_zip = os.path.join(SYSTEMDS_ROOT, 'target', file)
         extract_dir = os.path.join(TMP_DIR)
-        with ZipFile(new_path, 'r') as zip:
+
+        with ZipFile(systemds_bin_zip, 'r') as zip:
             for f in zip.namelist():
                 split_path = os.path.split(os.path.dirname(f))
                 if split_path[1] == 'lib':
                     zip.extract(f, TMP_DIR)
         unzipped_dir_name = file.rsplit('.', 1)[0]
-        shutil.copytree(os.path.join(TMP_DIR, unzipped_dir_name), 
java_dir_full_path)
-        if os.path.exists(TMP_DIR):
-            shutil.rmtree(TMP_DIR, True)
+        shutil.copytree(os.path.join(TMP_DIR, unzipped_dir_name, 'lib'), 
LIB_DIR)
+
+# Take hadoop binaries.
+HADOOP_DIR_SRC = os.path.join(SYSTEMDS_ROOT, 'target', 'lib', 'hadoop')
+if os.path.exists(HADOOP_DIR_SRC):
+    shutil.copytree(HADOOP_DIR_SRC, os.path.join(LIB_DIR,"hadoop"))
 
-root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
-shutil.copyfile(os.path.join(root_dir, 'LICENSE'), 'LICENSE')
-shutil.copyfile(os.path.join(root_dir, 'NOTICE'), 'NOTICE')
+# Take conf files.
+CONF_DIR = os.path.join(this_path, PYTHON_DIR, 'conf')
+if not os.path.exists(CONF_DIR):
+    os.mkdir(CONF_DIR)
+shutil.copy(os.path.join(SYSTEMDS_ROOT,'conf', 'log4j.properties'), 
os.path.join(this_path, PYTHON_DIR, 'conf', 'log4j.properties'))
+shutil.copy(os.path.join(SYSTEMDS_ROOT,'conf', 
'SystemDS-config-defaults.xml'), os.path.join(this_path, PYTHON_DIR, 'conf', 
'SystemDS-config-defaults.xml'))
 
-# delete old build and dist path
+SYSTEMDS_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
+shutil.copyfile(os.path.join(SYSTEMDS_ROOT, 'LICENSE'), 'LICENSE')
+shutil.copyfile(os.path.join(SYSTEMDS_ROOT, 'NOTICE'), 'NOTICE')
+
+# Remove old build and dist path
+if os.path.exists(TMP_DIR):
+    shutil.rmtree(TMP_DIR, True)

Review comment:
       Changes in the release-related files need testing @j143  and myself, as 
they can break the release automation.
   I would recommend sticking to necessary changes and leaving the improvements 
for future releases.

##########
File path: src/main/python/pre_setup.py
##########
@@ -26,40 +26,56 @@
 from zipfile import ZipFile
 
 this_path = os.path.dirname(os.path.realpath(__file__))
-python_dir = 'systemds'
-java_dir = 'systemds-java'
-java_dir_full_path = os.path.join(this_path, python_dir, java_dir)
-if os.path.exists(java_dir_full_path):
-    shutil.rmtree(java_dir_full_path, True)
-root_dir = os.path.dirname(os.path.dirname(os.path.dirname(this_path)))
+PYTHON_DIR = 'systemds'
+
+# Go three directories out this is the root dir of systemds repository
+SYSTEMDS_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(this_path))) 
 
 # temporary directory for unzipping of bin zip
 TMP_DIR = os.path.join(this_path, 'tmp')
 if os.path.exists(TMP_DIR):
     shutil.rmtree(TMP_DIR, True)
 os.mkdir(TMP_DIR)
 
+
+# Copy jar files from release artifact.
+LIB_DIR = os.path.join(this_path, PYTHON_DIR, 'lib')
+if os.path.exists(LIB_DIR):
+    shutil.rmtree(LIB_DIR, True)
 SYSTEMDS_BIN = 'systemds-*-bin.zip'
-for file in os.listdir(os.path.join(root_dir, 'target')):
+for file in os.listdir(os.path.join(SYSTEMDS_ROOT, 'target')):
+    # Take jar files from bin release file
     if fnmatch.fnmatch(file, SYSTEMDS_BIN):
-        new_path = os.path.join(TMP_DIR, file)
-        shutil.copyfile(os.path.join(root_dir, 'target', file), new_path)
+        systemds_bin_zip = os.path.join(SYSTEMDS_ROOT, 'target', file)
         extract_dir = os.path.join(TMP_DIR)
-        with ZipFile(new_path, 'r') as zip:
+
+        with ZipFile(systemds_bin_zip, 'r') as zip:
             for f in zip.namelist():
                 split_path = os.path.split(os.path.dirname(f))
                 if split_path[1] == 'lib':
                     zip.extract(f, TMP_DIR)
         unzipped_dir_name = file.rsplit('.', 1)[0]
-        shutil.copytree(os.path.join(TMP_DIR, unzipped_dir_name), 
java_dir_full_path)
-        if os.path.exists(TMP_DIR):
-            shutil.rmtree(TMP_DIR, True)
+        shutil.copytree(os.path.join(TMP_DIR, unzipped_dir_name, 'lib'), 
LIB_DIR)
+
+# Take hadoop binaries.
+HADOOP_DIR_SRC = os.path.join(SYSTEMDS_ROOT, 'target', 'lib', 'hadoop')
+if os.path.exists(HADOOP_DIR_SRC):
+    shutil.copytree(HADOOP_DIR_SRC, os.path.join(LIB_DIR,"hadoop"))

Review comment:
       Please document why do we need the hadoop and winutils dll/libs?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to