This is an automated email from the ASF dual-hosted git repository. mykolabodnar pushed a commit to branch DATALAB-2372 in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
commit e6d921eb3b81df1b56394cb043841617af75cd20 Author: bodnarmykola <[email protected]> AuthorDate: Tue Jun 22 13:31:23 2021 +0300 [DATALAB-2372] - [GCP] Deeplearning deploy from cloud image implemented --- .../src/base/scripts/install_user_key.py | 8 ++++-- .../scripts/configure_deep_learning_node.py | 32 ++++++++++++++++++++-- .../files/gcp/deeplearning_description.json | 2 +- .../src/general/lib/gcp/meta_lib.py | 17 ++++++++++++ .../general/scripts/gcp/common_prepare_notebook.py | 20 +++++++++++--- 5 files changed, 69 insertions(+), 10 deletions(-) diff --git a/infrastructure-provisioning/src/base/scripts/install_user_key.py b/infrastructure-provisioning/src/base/scripts/install_user_key.py index 3d417ab..d7a5faf 100644 --- a/infrastructure-provisioning/src/base/scripts/install_user_key.py +++ b/infrastructure-provisioning/src/base/scripts/install_user_key.py @@ -66,9 +66,11 @@ if __name__ == "__main__": except: print('Fail connection') sys.exit(2) - - print("Ensuring safest ssh ciphers") - ensure_ciphers() + try: + print("Ensuring safest ssh ciphers") + ensure_ciphers() + except: + print('Faild to install safest ssh ciphers') print("Installing users key...") try: diff --git a/infrastructure-provisioning/src/deeplearning/scripts/configure_deep_learning_node.py b/infrastructure-provisioning/src/deeplearning/scripts/configure_deep_learning_node.py index 00600e5..54f8601 100644 --- a/infrastructure-provisioning/src/deeplearning/scripts/configure_deep_learning_node.py +++ b/infrastructure-provisioning/src/deeplearning/scripts/configure_deep_learning_node.py @@ -90,6 +90,22 @@ def install_itorch(os_user): conn.sudo('chown -R {0}:{0} /home/{0}/.local/share/jupyter/'.format(os_user)) conn.sudo('touch /home/{}/.ensure_dir/itorch_ensured'.format(os_user)) +def configure_jupyterlab_at_gcp_image(os_user, exploratory_name): + if not exists(conn, '/home/{}/.ensure_dir/jupyterlab_ensured'.format(os_user)): + jupyter_conf_file = '/home/jupyter/.jupyter/jupyter_notebook_config.py' + conn.sudo('''bash -l -c 'sed -i "s|c.NotebookApp|#c.NotebookApp|g" {}' '''.format(jupyter_conf_file)) + conn.sudo('''bash -l -c "echo 'c.NotebookApp.ip = \\"0.0.0.0\\" ' >> {}" '''.format(jupyter_conf_file)) + conn.sudo('''bash -l -c "echo 'c.NotebookApp.port = 8888' >> {}" '''.format(jupyter_conf_file)) + conn.sudo('''bash -l -c "echo 'c.NotebookApp.base_url = \\"/{0}/\\"' >> {1}" '''.format(exploratory_name, + jupyter_conf_file)) + conn.sudo('''bash -l -c "echo 'c.NotebookApp.open_browser = False' >> {}" '''.format(jupyter_conf_file)) + conn.sudo('''bash -l -c "echo 'c.NotebookApp.allow_remote_access = True' >> {}" '''.format(jupyter_conf_file)) + conn.sudo('''bash -l -c "echo 'c.NotebookApp.cookie_secret = b\\"{0}\\"' >> {1}" '''.format(id_generator(), + jupyter_conf_file)) + conn.sudo('''bash -l -c "echo \\"c.NotebookApp.token = u''\\" >> {}" '''.format(jupyter_conf_file)) + conn.sudo('systemctl restart jupyter') + conn.sudo('touch /home/{}/.ensure_dir/jupyterlab_ensured'.format(os_user)) + if __name__ == "__main__": print("Configure connections") @@ -105,7 +121,16 @@ if __name__ == "__main__": except: sys.exit(1) print("Mount additional volume") - prepare_disk(args.os_user) + if os.environ['conf_cloud_provider'] == 'gcp' and os.environ['conf_deeplearning_cloud_ami'] == 'true': + print('Additional disk premounted by google image') + print('Installing nvidia drivers') + try: + conn.sudo('/opt/deeplearning/install-driver.sh') + except: + traceback.print_exc() + sys.exit(1) + else: + prepare_disk(args.os_user) if os.environ['conf_deeplearning_cloud_ami'] == 'false': # INSTALL LANGUAGES @@ -157,10 +182,13 @@ if __name__ == "__main__": ensure_additional_python_libs(args.os_user) print("Install Matplotlib") ensure_matplot(args.os_user) - elif os.environ['conf_deeplearning_cloud_ami'] == 'true': + elif os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['conf_cloud_provider'] != 'gcp': # CONFIGURE JUPYTER NOTEBOOK print("Configure Jupyter") configure_jupyter(args.os_user, jupyter_conf_file, templates_dir, args.jupyter_version, args.exploratory_name) + else: + configure_jupyterlab_at_gcp_image(args.os_user, args.exploratory_name) + # INSTALL UNGIT print("Install nodejs") diff --git a/infrastructure-provisioning/src/general/files/gcp/deeplearning_description.json b/infrastructure-provisioning/src/general/files/gcp/deeplearning_description.json index 79ed687..f3dd752 100644 --- a/infrastructure-provisioning/src/general/files/gcp/deeplearning_description.json +++ b/infrastructure-provisioning/src/general/files/gcp/deeplearning_description.json @@ -20,7 +20,7 @@ "exploratory_environment_images" : [ {"Image family": "common-cu110", "Description": "Google Deep Learning Image: Base, m67 CUDA11.0, A debian-10 Linux based image with CUDA 11.0 preinstalled."}, - {"Image family": "ccommon-cu100", "Description": "Google Deep Learning Image: Base, m67 CUDA10.0. A debian-10 Linux based image with CUDA 10.0 preinstalled."}, + {"Image family": "common-cu100", "Description": "Google Deep Learning Image: Base, m67 CUDA10.0. A debian-10 Linux based image with CUDA 10.0 preinstalled."}, {"Image family": "common-cu92", "Description": "Google Deep Learning Image: Base, m67 CUDA 9.2, A Debian based image with CUDA 9.2 pre-installed."}, {"Image family": "pytorch-latest-gpu", "Description": "Google Deep Learning Image: PyTorch 1.8, m67 CUDA 110, A debian-10 Linux based image with PyTorch 1.8 pre-installed."}, {"Image family": "rapids-latest-gpu-experimental", "Description": "Google RAPIDS 0.5.1 with XGBoost, m64, RAPIDS 0.5.1 with XGBoost with CUDA 10.0."}, diff --git a/infrastructure-provisioning/src/general/lib/gcp/meta_lib.py b/infrastructure-provisioning/src/general/lib/gcp/meta_lib.py index 583ec5f..be5d17b 100644 --- a/infrastructure-provisioning/src/general/lib/gcp/meta_lib.py +++ b/infrastructure-provisioning/src/general/lib/gcp/meta_lib.py @@ -394,6 +394,23 @@ class GCPMeta: "error_message": str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout)})) traceback.print_exc(file=sys.stdout) + def get_deeplearning_image_by_family(self, family_name): + try: + request = self.service.images().getFromFamily(project='deeplearning-platform-release', family=family_name) + try: + return request.execute() + except errors.HttpError as err: + if err.resp.status == 404: + return '' + else: + raise err + except Exception as err: + logging.info("Error with getting image by family: " + str(err) + "\n Traceback: " + traceback.print_exc( + file=sys.stdout)) + append_result(str({"error": "Error with getting image by family", + "error_message": str(err) + "\n Traceback: " + traceback.print_exc(file=sys.stdout)})) + traceback.print_exc(file=sys.stdout) + def get_disk(self, disk_name): try: request = self.service.disks().get(project=self.project, zone=os.environ['gcp_zone'], disk=disk_name) diff --git a/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py b/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py index 2cb3f64..1c3c038 100644 --- a/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py +++ b/infrastructure-provisioning/src/general/scripts/gcp/common_prepare_notebook.py @@ -119,19 +119,31 @@ if __name__ == "__main__": os.environ['application'], os.environ['notebook_image_name'].replace('_', '-').lower()) if (x != 'None' and x != '') else notebook_config['expected_primary_image_name'])(str(os.environ.get('notebook_image_name'))) print('Searching pre-configured images') - notebook_config['primary_image_name'] = GCPMeta.get_image_by_name( - notebook_config['notebook_primary_image_name']) + + if os.environ['conf_deeplearning_cloud_ami'] == 'true' and os.environ['application'] == 'deeplearning': + notebook_config['primary_image_name'] = GCPMeta.get_deeplearning_image_by_family(os.environ['notebook_image_name']) + if notebook_config['primary_image_name']: + deeplearning_ami = 'true' + else: + notebook_config['primary_image_name'] = GCPMeta.get_image_by_name(notebook_config['notebook_primary_image_name']) + deeplearning_ami = 'false' if notebook_config['primary_image_name'] == '': notebook_config['primary_image_name'] = os.environ['gcp_{}_image_name'.format(os.environ['conf_os_family'])] else: print('Pre-configured primary image found. Using: {}'.format( notebook_config['primary_image_name'].get('name'))) - notebook_config['primary_image_name'] = 'global/images/{}'.format( + if deeplearning_ami == 'true': + notebook_config['primary_image_name'] = 'projects/deeplearning-platform-release/global/images/{}'.format( + notebook_config['primary_image_name'].get('name')) + else: + notebook_config['primary_image_name'] = 'global/images/{}'.format( notebook_config['primary_image_name'].get('name')) notebook_config['notebook_secondary_image_name'] = (lambda x: '{0}-{1}-{2}-{3}-secondary-image-{4}'.format( notebook_config['service_base_name'], notebook_config['project_name'], notebook_config['endpoint_name'], - os.environ['application'], os.environ['notebook_image_name'].replace('_', '-').lower()) if (x != 'None' and x != '') + os.environ['application'], os.environ['notebook_image_name'].replace('_', '-').lower()[:63]) if (x != 'None' and x != '') else notebook_config['expected_secondary_image_name'])(str(os.environ.get('notebook_image_name'))) + if notebook_config['notebook_secondary_image_name'][:63].endswith('-'): + notebook_config['notebook_secondary_image_name'] = notebook_config['notebook_secondary_image_name'][:63][:-1] notebook_config['secondary_image_name'] = GCPMeta.get_image_by_name( notebook_config['notebook_secondary_image_name']) if notebook_config['secondary_image_name'] == '': --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
