This is an automated email from the ASF dual-hosted git repository. mykolabodnar pushed a commit to branch DLAB-2011 in repository https://gitbox.apache.org/repos/asf/incubator-dlab.git
commit b745d215a73754073215556f0db5c639d4e26ee9 Author: bodnarmykola <bodnarmyk...@gmail.com> AuthorDate: Thu Aug 27 15:15:10 2020 +0300 [DLAB-2011] - Support SparkMagic/Livy connection for Deeplearning/Jupyter with TensorFlow implemented --- .../src/general/files/aws/tensor_Dockerfile | 1 + .../general/files/azure/deeplearning_Dockerfile | 1 + .../src/general/files/azure/tensor_Dockerfile | 1 + .../src/general/files/gcp/deeplearning_Dockerfile | 1 + .../src/general/files/gcp/tensor_Dockerfile | 1 + .../os/deeplearning_dataengine_create_configs.py | 45 ++++++++++++++++++---- .../os/deeplearning_install_dataengine_kernels.py | 9 +++-- .../scripts/os/tensor_dataengine_create_configs.py | 45 ++++++++++++++++++---- .../os/tensor_install_dataengine_kernels.py | 9 +++-- 9 files changed, 91 insertions(+), 22 deletions(-) diff --git a/infrastructure-provisioning/src/general/files/aws/tensor_Dockerfile b/infrastructure-provisioning/src/general/files/aws/tensor_Dockerfile index 74b14f2..091168d 100644 --- a/infrastructure-provisioning/src/general/files/aws/tensor_Dockerfile +++ b/infrastructure-provisioning/src/general/files/aws/tensor_Dockerfile @@ -37,6 +37,7 @@ COPY general/templates/os/pyspark_dataengine_template.json /root/templates/ COPY general/templates/os/${OS}/ungit.service /root/templates/ COPY general/templates/os/tensorboard.service /root/templates/ COPY general/templates/os/pyspark_dataengine-service_template.json /root/templates/ +COPY general/templates/os/sparkmagic_config_template.json /root/templates/ COPY general/templates/os/inactive.sh /root/templates/ COPY general/templates/os/inactive.service /root/templates/ COPY general/templates/os/inactive.timer /root/templates/ diff --git a/infrastructure-provisioning/src/general/files/azure/deeplearning_Dockerfile b/infrastructure-provisioning/src/general/files/azure/deeplearning_Dockerfile index d072a63..641734b 100644 --- a/infrastructure-provisioning/src/general/files/azure/deeplearning_Dockerfile +++ b/infrastructure-provisioning/src/general/files/azure/deeplearning_Dockerfile @@ -35,6 +35,7 @@ COPY general/templates/os/pyspark_local_template.json /root/templates/ COPY general/templates/os/py3spark_local_template.json /root/templates/ COPY general/templates/os/pyspark_dataengine_template.json /root/templates/ COPY general/templates/os/r_template.json /root/templates/ +COPY general/templates/os/sparkmagic_config_template.json /root/templates/ COPY general/templates/os/run_template.sh /root/templates/ COPY general/templates/os/tensorboard.service /root/templates/ COPY general/templates/os/inactive.sh /root/templates/ diff --git a/infrastructure-provisioning/src/general/files/azure/tensor_Dockerfile b/infrastructure-provisioning/src/general/files/azure/tensor_Dockerfile index 5b2ac3c..2e52b1c 100644 --- a/infrastructure-provisioning/src/general/files/azure/tensor_Dockerfile +++ b/infrastructure-provisioning/src/general/files/azure/tensor_Dockerfile @@ -34,6 +34,7 @@ COPY general/templates/os/notebook_spark-defaults_local.conf /root/templates/ COPY general/templates/os/pyspark_local_template.json /root/templates/ COPY general/templates/os/py3spark_local_template.json /root/templates/ COPY general/templates/os/pyspark_dataengine_template.json /root/templates/ +COPY general/templates/os/sparkmagic_config_template.json /root/templates/ COPY general/templates/os/${OS}/ungit.service /root/templates/ COPY general/templates/os/tensorboard.service /root/templates/ COPY general/templates/os/inactive.sh /root/templates/ diff --git a/infrastructure-provisioning/src/general/files/gcp/deeplearning_Dockerfile b/infrastructure-provisioning/src/general/files/gcp/deeplearning_Dockerfile index 2aa2ca4..cfce2f6 100644 --- a/infrastructure-provisioning/src/general/files/gcp/deeplearning_Dockerfile +++ b/infrastructure-provisioning/src/general/files/gcp/deeplearning_Dockerfile @@ -35,6 +35,7 @@ COPY general/templates/os/pyspark_local_template.json /root/templates/ COPY general/templates/os/py3spark_local_template.json /root/templates/ COPY general/templates/os/pyspark_dataengine_template.json /root/templates/ COPY general/templates/os/r_template.json /root/templates/ +COPY general/templates/os/sparkmagic_config_template.json /root/templates/ COPY general/templates/os/run_template.sh /root/templates/ COPY general/templates/os/tensorboard.service /root/templates/ COPY general/templates/os/inactive.sh /root/templates/ diff --git a/infrastructure-provisioning/src/general/files/gcp/tensor_Dockerfile b/infrastructure-provisioning/src/general/files/gcp/tensor_Dockerfile index 9f1e594..28cd54e 100644 --- a/infrastructure-provisioning/src/general/files/gcp/tensor_Dockerfile +++ b/infrastructure-provisioning/src/general/files/gcp/tensor_Dockerfile @@ -36,6 +36,7 @@ COPY general/templates/os/py3spark_local_template.json /root/templates/ COPY general/templates/os/pyspark_dataengine_template.json /root/templates/ COPY general/templates/os/tensorboard.service /root/templates/ COPY general/templates/os/pyspark_dataengine-service_template.json /root/templates/ +COPY general/templates/os/sparkmagic_config_template.json /root/templates/ COPY general/templates/os/inactive.sh /root/templates/ COPY general/templates/os/inactive.service /root/templates/ COPY general/templates/os/inactive.timer /root/templates/ diff --git a/infrastructure-provisioning/src/general/scripts/os/deeplearning_dataengine_create_configs.py b/infrastructure-provisioning/src/general/scripts/os/deeplearning_dataengine_create_configs.py index 924489d..5f8fa12 100644 --- a/infrastructure-provisioning/src/general/scripts/os/deeplearning_dataengine_create_configs.py +++ b/infrastructure-provisioning/src/general/scripts/os/deeplearning_dataengine_create_configs.py @@ -99,15 +99,46 @@ def pyspark_kernel(args): format(args.cluster_name, kernel_path, args.os_user)) local('sudo mv /tmp/{}/kernel_var.json '.format(args.cluster_name) + kernel_path) +def install_sparkamagic_kernels(args): + try: + local('sudo jupyter nbextension enable --py --sys-prefix widgetsnbextension') + sparkmagic_dir = local("sudo pip3 show sparkmagic | grep 'Location: ' | awk '{print $2}'", capture=True) + local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkkernel --user'.format(sparkmagic_dir)) + local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/pysparkkernel --user'.format(sparkmagic_dir)) + + pyspark_kernel_name = 'PySpark (Python-3.6 / Spark-{0} ) [{1}]'.format(args.spark_version, + args.cluster_name) + local('sed -i \'s|PySpark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/pysparkkernel/kernel.json'.format( + pyspark_kernel_name, args.os_user)) + scala_version = local('spark-submit --version 2>&1 | grep -o -P "Scala version \K.{0,7}"', capture=True) + spark_kernel_name = 'Spark (Scala-{0} / Spark-{1} ) [{2}]'.format(scala_version, args.spark_version, + args.cluster_name) + local('sed -i \'s|Spark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/sparkkernel/kernel.json'.format( + spark_kernel_name, args.os_user)) + + local('sudo mv -f /home/{0}/.local/share/jupyter/kernels/pysparkkernel ' + '/home/{0}/.local/share/jupyter/kernels/pysparkkernel_{1}'.format(args.os_user, args.cluster_name)) + local('sudo mv -f /home/{0}/.local/share/jupyter/kernels/sparkkernel ' + '/home/{0}/.local/share/jupyter/kernels/sparkkernel_{1}'.format(args.os_user, args.cluster_name)) + + local('mkdir -p /home/' + args.os_user + '/.sparkmagic') + local('cp -f /tmp/sparkmagic_config_template.json /home/' + args.os_user + '/.sparkmagic/config.json') + spark_master_ip = args.spark_master.split('//')[1].split(':')[0] + local('sed -i \'s|LIVY_HOST|{0}|g\' /home/{1}/.sparkmagic/config.json'.format( + spark_master_ip, args.os_user)) + local('sudo chown -R {0}:{0} /home/{0}/.sparkmagic/'.format(args.os_user)) + except: + sys.exit(1) if __name__ == "__main__": if args.dry_run == 'true': parser.print_help() else: - dataengine_dir_prepare('/opt/{}/'.format(args.cluster_name)) - install_dataengine_spark(args.cluster_name, spark_link, spark_version, hadoop_version, cluster_dir, args.os_user, - args.datalake_enabled) - ensure_dataengine_tensorflow_jars(local_jars_dir) - configure_dataengine_spark(args.cluster_name, local_jars_dir, cluster_dir, args.datalake_enabled, - args.spark_configurations) - pyspark_kernel(args) + install_sparkamagic_kernels(args) + #dataengine_dir_prepare('/opt/{}/'.format(args.cluster_name)) + #install_dataengine_spark(args.cluster_name, spark_link, spark_version, hadoop_version, cluster_dir, args.os_user, + # args.datalake_enabled) + #ensure_dataengine_tensorflow_jars(local_jars_dir) + #configure_dataengine_spark(args.cluster_name, local_jars_dir, cluster_dir, args.datalake_enabled, + # args.spark_configurations) + #pyspark_kernel(args) diff --git a/infrastructure-provisioning/src/general/scripts/os/deeplearning_install_dataengine_kernels.py b/infrastructure-provisioning/src/general/scripts/os/deeplearning_install_dataengine_kernels.py index c01a907..8a5eaf7 100644 --- a/infrastructure-provisioning/src/general/scripts/os/deeplearning_install_dataengine_kernels.py +++ b/infrastructure-provisioning/src/general/scripts/os/deeplearning_install_dataengine_kernels.py @@ -45,14 +45,15 @@ def configure_notebook(keyfile, hoststring): templates_dir = '/root/templates/' scripts_dir = '/root/scripts/' run('mkdir -p /tmp/{}/'.format(args.cluster_name)) + put(templates_dir + 'sparkmagic_config_template.json', '/tmp/sparkmagic_config_template.json') if not exists('/tmp/deeplearning_dataengine_create_configs.py'): put(scripts_dir + 'deeplearning_dataengine_create_configs.py', '/tmp/deeplearning_dataengine_create_configs.py') - put(templates_dir + 'pyspark_dataengine_template.json', '/tmp/{}/pyspark_dataengine_template.json'.format(args.cluster_name)) - put(templates_dir + 'notebook_spark-defaults_local.conf', '/tmp/{}/notebook_spark-defaults_local.conf'.format(args.cluster_name)) + #put(templates_dir + 'pyspark_dataengine_template.json', '/tmp/{}/pyspark_dataengine_template.json'.format(args.cluster_name)) + #put(templates_dir + 'notebook_spark-defaults_local.conf', '/tmp/{}/notebook_spark-defaults_local.conf'.format(args.cluster_name)) spark_master_ip = args.spark_master.split('//')[1].split(':')[0] - spark_memory = get_spark_memory(True, args.os_user, spark_master_ip, keyfile) - run('echo "spark.executor.memory {0}m" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(spark_memory, args.cluster_name)) + #spark_memory = get_spark_memory(True, args.os_user, spark_master_ip, keyfile) + #run('echo "spark.executor.memory {0}m" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(spark_memory, args.cluster_name)) if not exists('/usr/local/bin/deeplearning_dataengine_create_configs.py'): put(scripts_dir + 'deeplearning_dataengine_create_configs.py', '/usr/local/bin/deeplearning_dataengine_create_configs.py', use_sudo=True) sudo('chmod 755 /usr/local/bin/deeplearning_dataengine_create_configs.py') diff --git a/infrastructure-provisioning/src/general/scripts/os/tensor_dataengine_create_configs.py b/infrastructure-provisioning/src/general/scripts/os/tensor_dataengine_create_configs.py index 2190879..38ca8ff 100644 --- a/infrastructure-provisioning/src/general/scripts/os/tensor_dataengine_create_configs.py +++ b/infrastructure-provisioning/src/general/scripts/os/tensor_dataengine_create_configs.py @@ -99,15 +99,46 @@ def pyspark_kernel(args): format(args.cluster_name, kernel_path, args.os_user)) local('sudo mv /tmp/{}/kernel_var.json '.format(args.cluster_name) + kernel_path) +def install_sparkamagic_kernels(args): + try: + local('sudo jupyter nbextension enable --py --sys-prefix widgetsnbextension') + sparkmagic_dir = local("sudo pip3 show sparkmagic | grep 'Location: ' | awk '{print $2}'", capture=True) + local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/sparkkernel --user'.format(sparkmagic_dir)) + local('sudo jupyter-kernelspec install {}/sparkmagic/kernels/pysparkkernel --user'.format(sparkmagic_dir)) + + pyspark_kernel_name = 'PySpark (Python-3.6 / Spark-{0} ) [{1}]'.format(args.spark_version, + args.cluster_name) + local('sed -i \'s|PySpark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/pysparkkernel/kernel.json'.format( + pyspark_kernel_name, args.os_user)) + scala_version = local('spark-submit --version 2>&1 | grep -o -P "Scala version \K.{0,7}"', capture=True) + spark_kernel_name = 'Spark (Scala-{0} / Spark-{1} ) [{2}]'.format(scala_version, args.spark_version, + args.cluster_name) + local('sed -i \'s|Spark|{0}|g\' /home/{1}/.local/share/jupyter/kernels/sparkkernel/kernel.json'.format( + spark_kernel_name, args.os_user)) + + local('sudo mv -f /home/{0}/.local/share/jupyter/kernels/pysparkkernel ' + '/home/{0}/.local/share/jupyter/kernels/pysparkkernel_{1}'.format(args.os_user, args.cluster_name)) + local('sudo mv -f /home/{0}/.local/share/jupyter/kernels/sparkkernel ' + '/home/{0}/.local/share/jupyter/kernels/sparkkernel_{1}'.format(args.os_user, args.cluster_name)) + + local('mkdir -p /home/' + args.os_user + '/.sparkmagic') + local('cp -f /tmp/sparkmagic_config_template.json /home/' + args.os_user + '/.sparkmagic/config.json') + spark_master_ip = args.spark_master.split('//')[1].split(':')[0] + local('sed -i \'s|LIVY_HOST|{0}|g\' /home/{1}/.sparkmagic/config.json'.format( + spark_master_ip, args.os_user)) + local('sudo chown -R {0}:{0} /home/{0}/.sparkmagic/'.format(args.os_user)) + except: + sys.exit(1) if __name__ == "__main__": if args.dry_run == 'true': parser.print_help() else: - dataengine_dir_prepare('/opt/{}/'.format(args.cluster_name)) - install_dataengine_spark(args.cluster_name, spark_link, spark_version, hadoop_version, cluster_dir, args.os_user, - args.datalake_enabled) - ensure_dataengine_tensorflow_jars(local_jars_dir) - configure_dataengine_spark(args.cluster_name, local_jars_dir, cluster_dir, args.datalake_enabled, - args.spark_configurations) - pyspark_kernel(args) + install_sparkamagic_kernels(args) + #dataengine_dir_prepare('/opt/{}/'.format(args.cluster_name)) + #install_dataengine_spark(args.cluster_name, spark_link, spark_version, hadoop_version, cluster_dir, args.os_user, + # args.datalake_enabled) + #ensure_dataengine_tensorflow_jars(local_jars_dir) + #configure_dataengine_spark(args.cluster_name, local_jars_dir, cluster_dir, args.datalake_enabled, + # args.spark_configurations) + #pyspark_kernel(args) diff --git a/infrastructure-provisioning/src/general/scripts/os/tensor_install_dataengine_kernels.py b/infrastructure-provisioning/src/general/scripts/os/tensor_install_dataengine_kernels.py index 40024ac..0ff9a0a 100644 --- a/infrastructure-provisioning/src/general/scripts/os/tensor_install_dataengine_kernels.py +++ b/infrastructure-provisioning/src/general/scripts/os/tensor_install_dataengine_kernels.py @@ -45,11 +45,12 @@ def configure_notebook(keyfile, hoststring): templates_dir = '/root/templates/' scripts_dir = '/root/scripts/' run('mkdir -p /tmp/{}/'.format(args.cluster_name)) - put(templates_dir + 'pyspark_dataengine_template.json', '/tmp/{}/pyspark_dataengine_template.json'.format(args.cluster_name)) - put(templates_dir + 'notebook_spark-defaults_local.conf', '/tmp/{}/notebook_spark-defaults_local.conf'.format(args.cluster_name)) + put(templates_dir + 'sparkmagic_config_template.json', '/tmp/sparkmagic_config_template.json') + #put(templates_dir + 'pyspark_dataengine_template.json', '/tmp/{}/pyspark_dataengine_template.json'.format(args.cluster_name)) + #put(templates_dir + 'notebook_spark-defaults_local.conf', '/tmp/{}/notebook_spark-defaults_local.conf'.format(args.cluster_name)) spark_master_ip = args.spark_master.split('//')[1].split(':')[0] - spark_memory = get_spark_memory(True, args.os_user, spark_master_ip, keyfile) - run('echo "spark.executor.memory {0}m" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(spark_memory, args.cluster_name)) + #spark_memory = get_spark_memory(True, args.os_user, spark_master_ip, keyfile) + #run('echo "spark.executor.memory {0}m" >> /tmp/{1}/notebook_spark-defaults_local.conf'.format(spark_memory, args.cluster_name)) if not exists('/usr/local/bin/tensor_dataengine_create_configs.py'): put(scripts_dir + 'tensor_dataengine_create_configs.py', '/usr/local/bin/tensor_dataengine_create_configs.py', use_sudo=True) sudo('chmod 755 /usr/local/bin/tensor_dataengine_create_configs.py') --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@dlab.apache.org For additional commands, e-mail: commits-h...@dlab.apache.org