This is an automated email from the ASF dual-hosted git repository.
lfrolov pushed a commit to branch DATALAB-2691
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
The following commit(s) were added to refs/heads/DATALAB-2691 by this push:
new 7c3a7d4 [DATALAB-2691]: fixed python libs install for gcp dataproc on
zeppelin
7c3a7d4 is described below
commit 7c3a7d4133b335bd2b7a6ca5963823345eb0d4d6
Author: leonidfrolov <[email protected]>
AuthorDate: Wed Mar 16 13:57:39 2022 +0200
[DATALAB-2691]: fixed python libs install for gcp dataproc on zeppelin
---
.../src/general/lib/gcp/actions_lib.py | 12 +++++-------
.../gcp/zeppelin_dataengine-service_create_configs.py | 11 ++++++++---
.../gcp/zeppelin_install_dataengine-service_kernels.py | 12 +++++++-----
3 files changed, 20 insertions(+), 15 deletions(-)
diff --git a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
index 6bfc177..b0733b8 100644
--- a/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/gcp/actions_lib.py
@@ -1340,7 +1340,7 @@ class GCPActions:
except:
sys.exit(1)
- def install_python(self, bucket, user_name, cluster_name, application,
numpy_version='1.14.3'):
+ def install_python(self, bucket, user_name, cluster_name, application,
numpy_version):
try:
GCPActions().get_cluster_app_version(bucket, user_name,
cluster_name, 'python')
with open('/tmp/python_version') as f:
@@ -1355,11 +1355,9 @@ class GCPActions:
subprocess.run('sudo -i virtualenv
/opt/python/python{}'.format(python_version), shell=True, check=True)
venv_command = 'source
/opt/python/python{}/bin/activate'.format(python_version)
pip_command =
'/opt/python/python{0}/bin/pip{1}'.format(python_version, python_version[:3])
- subprocess.run('bash -c "{0} && sudo -i {1} install -U
pip==9.0.3"'.format(venv_command, pip_command), shell=True, check=True)
- subprocess.run('bash -c "{0} && sudo -i {1} install
pyzmq==17.0.0"'.format(venv_command, pip_command), shell=True, check=True)
- for lib in ['ipython', 'ipykernel',
'NumPy=={}'.format(numpy_version), 'boto', 'boto3', 'pybind11',
- 'pythran', 'cython', 'SciPy', 'Matplotlib',
'pandas', 'Sympy', 'Pillow', 'sklearn']:
- subprocess.run('bash -c "{0} && sudo -i {1} install {2}
--no-cache-dir"'
+ for lib in ['-U pip==9.0.3', 'pyzmq==17.0.0', 'ipython
ipykernel boto boto3 pybind11 pythran cython NumPy=={} Matplotlib
--no-cache-dir'.format(numpy_version),
+ 'SciPy pandas Sympy Pillow --no-cache-dir',
'sklearn --no-cache-dir']:
+ subprocess.run('bash -c "{0} && sudo -i {1} install {2}"'
.format(venv_command, pip_command, lib),
shell=True, check=True)
if application == 'deeplearning':
subprocess.run('bash -c "{0} && sudo -i {1} install
mxnet-cu80 opencv-python keras Theano --no-cache-dir"'.format(venv_command,
pip_command), shell=True, check=True)
@@ -1410,7 +1408,7 @@ def get_cluster_python_version(region, bucket, user_name,
cluster_name):
def installing_python(region, bucket, user_name, cluster_name, application='',
pip_mirror='', numpy_version='1.14.3'):
try:
- GCPActions().install_python(bucket, user_name, cluster_name,
application)
+ GCPActions().install_python(bucket, user_name, cluster_name,
application, numpy_version)
except:
sys.exit(1)
diff --git
a/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_dataengine-service_create_configs.py
b/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_dataengine-service_create_configs.py
index ea33688..380015c 100644
---
a/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_dataengine-service_create_configs.py
+++
b/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_dataengine-service_create_configs.py
@@ -36,6 +36,7 @@ parser.add_argument('--dry_run', type=str, default='false')
parser.add_argument('--dataproc_version', type=str, default='')
parser.add_argument('--spark_version', type=str, default='')
parser.add_argument('--hadoop_version', type=str, default='')
+parser.add_argument('--numpy_version', type=str, default='')
parser.add_argument('--region', type=str, default='')
parser.add_argument('--user_name', type=str, default='')
parser.add_argument('--os_user', type=str, default='')
@@ -80,7 +81,11 @@ if __name__ == "__main__":
configuring_notebook(args.dataproc_version)
if args.multiple_clusters == 'true':
install_remote_livy(args)
- installing_python(args.region, args.bucket, args.user_name,
args.cluster_name, args.application, args.pip_mirror)
-
datalab.actions_lib.GCPActions().configure_zeppelin_dataproc_interpreter(args.dataproc_version,
args.cluster_name, spark_dir, args.os_user,
-
yarn_dir, args.bucket, args.user_name, args.multiple_clusters)
+ installing_python(args.region, args.bucket, args.user_name,
args.cluster_name, args.application,
+ args.pip_mirror, args.numpy_version)
+
datalab.actions_lib.GCPActions().configure_zeppelin_dataproc_interpreter(args.dataproc_version,
+
args.cluster_name, spark_dir,
+
args.os_user, yarn_dir,
+
args.bucket, args.user_name,
+
args.multiple_clusters)
update_zeppelin_interpreters(args.multiple_clusters, args.r_enabled)
diff --git
a/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_install_dataengine-service_kernels.py
b/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_install_dataengine-service_kernels.py
index 22936d4..7893d64 100644
---
a/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_install_dataengine-service_kernels.py
+++
b/infrastructure-provisioning/src/general/scripts/gcp/zeppelin_install_dataengine-service_kernels.py
@@ -57,7 +57,8 @@ def configure_notebook(args):
conn.sudo('chmod 755 /usr/local/bin/create_configs.py')
conn.sudo('mkdir -p /usr/lib/python3.8/datalab/')
conn.run('mkdir -p /home/{}/datalab_libs/'.format(args.os_user))
- conn.local('rsync -e "ssh -i {0}" /usr/lib/python3.8/datalab/*.py
{1}@{2}:/home/{1}/datalab_libs/'.format(args.keyfile, args.os_user,
args.notebook_ip))
+ conn.local('rsync -e "ssh -i {0}" /usr/lib/python3.8/datalab/*.py
{1}@{2}:/home/{1}/datalab_libs/'
+ .format(args.keyfile, args.os_user, args.notebook_ip))
conn.run('chmod a+x /home/{}/datalab_libs/*'.format(args.os_user))
conn.sudo('mv /home/{}/datalab_libs/*
/usr/lib/python3.8/datalab/'.format(args.os_user))
conn.sudo('rm -rf /home/{}/datalab_libs/'.format(args.os_user))
@@ -75,7 +76,8 @@ if __name__ == "__main__":
hadoop_version =
datalab.actions_lib.GCPActions().get_cluster_app_version(args.bucket,
args.project_name, args.cluster_name, 'hadoop')
conn.sudo('''bash -l -c 'echo "[global]" > /etc/pip.conf; echo "proxy =
$(cat /etc/profile | grep proxy | head -n1 | cut -f2 -d=)" >> /etc/pip.conf'
''')
conn.sudo('''bash -l -c 'echo "use_proxy=yes" > ~/.wgetrc; proxy=$(cat
/etc/profile | grep proxy | head -n1 | cut -f2 -d=); echo "http_proxy=$proxy"
>> ~/.wgetrc; echo "https_proxy=$proxy" >> ~/.wgetrc' ''')
- conn.sudo('''bash -l -c 'unset http_proxy https_proxy; export
gcp_project_id="{0}"; export conf_resource="{1}"; /usr/bin/python3
/usr/local/bin/create_configs.py --bucket {2} --cluster_name {3}
--dataproc_version {4} --spark_version {5} --hadoop_version {6} --region {7}
--user_name {8} --os_user {9} --application {10} --livy_version {11}
--multiple_clusters {12} --r_enabled {13}' '''
- .format(os.environ['gcp_project_id'], os.environ['conf_resource'],
args.bucket, args.cluster_name, args.dataproc_version,
- spark_version, hadoop_version, args.region, args.project_name,
args.os_user, args.application,
- os.environ['notebook_livy_version'],
os.environ['notebook_multiple_clusters'], r_enabled))
\ No newline at end of file
+ conn.sudo('''bash -l -c 'unset http_proxy https_proxy; export
gcp_project_id="{0}"; export conf_resource="{1}"; /usr/bin/python3
/usr/local/bin/create_configs.py --bucket {2} --cluster_name {3}
--dataproc_version {4} --spark_version {5} --hadoop_version {6} --region {7}
--user_name {8} --os_user {9} --application {10} --livy_version {11}
--multiple_clusters {12} --r_enabled {13} --numpy_version {14}' '''
+ .format(os.environ['gcp_project_id'], os.environ['conf_resource'],
args.bucket, args.cluster_name,
+ args.dataproc_version, spark_version, hadoop_version,
args.region, args.project_name, args.os_user,
+ args.application, os.environ['notebook_livy_version'],
os.environ['notebook_multiple_clusters'],
+ r_enabled, os.environ['notebook_numpy_version']))
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]