[incubator-datalab] branch DATALAB-2998 updated: [DATALAB-2998]: fixed zeppelin hdinsight interpreter addition

lfrolov Thu, 22 Sep 2022 04:51:52 -0700

This is an automated email from the ASF dual-hosted git repository.

lfrolov pushed a commit to branch DATALAB-2998
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git



The following commit(s) were added to refs/heads/DATALAB-2998 by this push:
     new 23570d214 [DATALAB-2998]: fixed zeppelin hdinsight interpreter addition
23570d214 is described below

commit 23570d214708ed44bdb8565ed360527a4fb592b6
Author: leonidfrolov <[email protected]>
AuthorDate: Thu Sep 22 14:51:32 2022 +0300

    [DATALAB-2998]: fixed zeppelin hdinsight interpreter addition
---
 .../src/general/files/azure/zeppelin_Dockerfile    |   1 +
 .../src/general/lib/azure/actions_lib.py           | 306 ++++++++++++---------
 .../src/general/lib/os/fab.py                      |   2 +-
 ...common_notebook_configure_dataengine-service.py |   9 +-
 .../scripts/azure/dataengine-service_terminate.py  |   9 +
 .../zeppelin_dataengine-service_create_configs.py  |   4 +-
 .../zeppelin_install_dataengine-service_kernels.py |   6 +-
 .../azure/dataengine-service_interpreter_livy.json |   2 +-
 8 files changed, 192 insertions(+), 147 deletions(-)

diff --git 
a/infrastructure-provisioning/src/general/files/azure/zeppelin_Dockerfile 
b/infrastructure-provisioning/src/general/files/azure/zeppelin_Dockerfile
index b2a0a6f90..46f01d148 100644
--- a/infrastructure-provisioning/src/general/files/azure/zeppelin_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/azure/zeppelin_Dockerfile
@@ -28,6 +28,7 @@ COPY zeppelin/ /root/
 COPY general/scripts/os/* /root/scripts/
 COPY general/scripts/azure/zeppelin_* /root/scripts/
 COPY general/lib/os/${OS}/notebook_lib.py 
/usr/lib/python3.8/datalab/notebook_lib.py
+COPY general/templates/azure/dataengine-service_interpreter_livy.json 
/root/templates/
 COPY general/templates/azure/interpreter_livy.json /root/templates/
 COPY general/templates/azure/interpreter_spark.json /root/templates/
 COPY general/templates/os/dataengine_interpreter_livy.json /root/templates/
diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py 
b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index 63bdc793a..1e3c23275 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -1130,6 +1130,37 @@ class AzureActions:
                 conn.sudo('rm -rf 
/home/{}/.ensure_dir/dataengine_{}_interpreter_ensured'.format(os_user, 
cluster_name))
             if exists(conn, 
'/home/{}/.ensure_dir/rstudio_dataengine_ensured'.format(os_user)):
                 
datalab.fab.remove_rstudio_dataengines_kernel(os.environ['computational_name'], 
os_user)
+            if exists(conn, 
'/home/{}/.ensure_dir/dataengine-service_{}_interpreter_ensured'.format(os_user,
+                                                                               
                     cluster_name)):
+                conn.sudo("rm -rf 
/home/{}/.ensure_dir/dataengine-service_interpreter_ensure".format(os_user))
+                zeppelin_url = 'http://' + private + 
':8080/api/interpreter/setting/'
+                opener = 
urllib.request.build_opener(urllib.request.ProxyHandler({}))
+                req = opener.open(urllib.request.Request(zeppelin_url))
+                r_text = req.read()
+                interpreter_json = json.loads(r_text)
+                interpreter_prefix = cluster_name
+                for interpreter in interpreter_json['body']:
+                    if interpreter_prefix in interpreter['name']:
+                        print("Interpreter with ID: {0} and name: {1} will be 
removed from zeppelin!".
+                              format(interpreter['id'], interpreter['name']))
+                        request = urllib.request.Request(zeppelin_url + 
interpreter['id'], data=''.encode())
+                        request.get_method = lambda: 'DELETE'
+                        url = opener.open(request)
+                        print(url.read())
+                conn.sudo('chown ' + os_user + ':' + os_user + ' -R 
/opt/zeppelin/')
+                conn.sudo('systemctl daemon-reload')
+                conn.sudo("service zeppelin-notebook stop")
+                conn.sudo("service zeppelin-notebook start")
+                zeppelin_restarted = False
+                while not zeppelin_restarted:
+                    conn.sudo('sleep 5')
+                    result = conn.sudo('nmap -p 8080 localhost | grep "closed" 
> /dev/null; echo $?').stdout
+                    result = result[:1]
+                    if result == '1':
+                        zeppelin_restarted = True
+                conn.sudo('sleep 5')
+                conn.sudo('rm -rf 
/home/{}/.ensure_dir/dataengine-service_{}_interpreter_ensured'.format(os_user,
+                                                                               
                          cluster_name))
             conn.sudo('rm -rf  /opt/' + cluster_name + '/')
             print("Notebook's {} kernels were removed".format(private))
         except Exception as err:
@@ -1209,145 +1240,146 @@ class AzureActions:
                                    file=sys.stdout)}))
             traceback.print_exc(file=sys.stdout)
 
-    def configure_zeppelin_hdinsight_interpreter(self, cluster_name, os_user, 
headnode_ip):
-        try:
-            # (self, emr_version, cluster_name, region, spark_dir, os_user, 
yarn_dir, bucket,
-            #                                            user_name, 
endpoint_url, multiple_emrs)
-            # port_number_found = False
-            # zeppelin_restarted = False
-            default_port = 8998
-            # get_cluster_python_version(region, bucket, user_name, 
cluster_name)
-            # with open('/tmp/python_version') as f:
-            #     python_version = f.read()
-            # python_version = python_version[0:5]
-            # livy_port = ''
-            # livy_path = '/opt/{0}/{1}/livy/'.format(emr_version, 
cluster_name)
-            # spark_libs = 
"/opt/{0}/jars/usr/share/aws/aws-java-sdk/aws-java-sdk-core*.jar " \
-            #              "/opt/{0}/jars/usr/lib/hadoop/hadoop-aws*.jar " \
-            #              
"/opt/{0}/jars/usr/share/aws/aws-java-sdk/aws-java-sdk-s3-*.jar " \
-            #              
"/opt/{0}/jars/usr/lib/hadoop-lzo/lib/hadoop-lzo-*.jar".format(emr_version)
-            # # fix due to: Multiple py4j files found under 
..../spark/python/lib
-            # # py4j-0.10.7-src.zip still in folder. Versions may varies.
-            # subprocess.run('rm 
/opt/{0}/{1}/spark/python/lib/py4j-src.zip'.format(emr_version, cluster_name),
-            #                shell=True, check=True)
-            #
-            # subprocess.run('echo \"Configuring emr path for Zeppelin\"', 
shell=True, check=True)
-            # subprocess.run('sed -i \"s/^export SPARK_HOME.*/export 
SPARK_HOME=\/opt\/{0}\/{1}\/spark/\" '
-            #                
'/opt/zeppelin/conf/zeppelin-env.sh'.format(emr_version, cluster_name), 
shell=True,
+def configure_zeppelin_hdinsight_interpreter(cluster_name, os_user, 
headnode_ip):
+    try:
+        # (self, emr_version, cluster_name, region, spark_dir, os_user, 
yarn_dir, bucket,
+        #                                            user_name, endpoint_url, 
multiple_emrs)
+        # port_number_found = False
+        # zeppelin_restarted = False
+        default_port = '8998'
+        # get_cluster_python_version(region, bucket, user_name, cluster_name)
+        # with open('/tmp/python_version') as f:
+        #     python_version = f.read()
+        # python_version = python_version[0:5]
+        # livy_port = ''
+        # livy_path = '/opt/{0}/{1}/livy/'.format(emr_version, cluster_name)
+        # spark_libs = 
"/opt/{0}/jars/usr/share/aws/aws-java-sdk/aws-java-sdk-core*.jar " \
+        #              "/opt/{0}/jars/usr/lib/hadoop/hadoop-aws*.jar " \
+        #              
"/opt/{0}/jars/usr/share/aws/aws-java-sdk/aws-java-sdk-s3-*.jar " \
+        #              
"/opt/{0}/jars/usr/lib/hadoop-lzo/lib/hadoop-lzo-*.jar".format(emr_version)
+        # # fix due to: Multiple py4j files found under ..../spark/python/lib
+        # # py4j-0.10.7-src.zip still in folder. Versions may varies.
+        # subprocess.run('rm 
/opt/{0}/{1}/spark/python/lib/py4j-src.zip'.format(emr_version, cluster_name),
+        #                shell=True, check=True)
+        #
+        # subprocess.run('echo \"Configuring emr path for Zeppelin\"', 
shell=True, check=True)
+        # subprocess.run('sed -i \"s/^export SPARK_HOME.*/export 
SPARK_HOME=\/opt\/{0}\/{1}\/spark/\" '
+        #                
'/opt/zeppelin/conf/zeppelin-env.sh'.format(emr_version, cluster_name), 
shell=True,
+        #                check=True)
+        # subprocess.run('sed -i "s/^export HADOOP_CONF_DIR.*/export 
HADOOP_CONF_DIR=' + \
+        #                '\/opt\/{0}\/{1}\/conf/" 
/opt/{0}/{1}/spark/conf/spark-env.sh'.format(emr_version,
+        #                                                                      
                cluster_name),
+        #                shell=True, check=True)
+        # subprocess.run(
+        #     'echo \"spark.jars $(ls {0} | tr \'\\n\' \',\')\" >> 
/opt/{1}/{2}/spark/conf/spark-defaults.conf'
+        #     .format(spark_libs, emr_version, cluster_name), shell=True, 
check=True)
+        # subprocess.run('sed -i "/spark.executorEnv.PYTHONPATH/d" 
/opt/{0}/{1}/spark/conf/spark-defaults.conf'
+        #                .format(emr_version, cluster_name), shell=True, 
check=True)
+        # subprocess.run('sed -i "/spark.yarn.dist.files/d" 
/opt/{0}/{1}/spark/conf/spark-defaults.conf'
+        #                .format(emr_version, cluster_name), shell=True, 
check=True)
+        # subprocess.run('sudo chown {0}:{0} -R 
/opt/zeppelin/'.format(os_user), shell=True, check=True)
+        # subprocess.run('sudo systemctl daemon-reload', shell=True, 
check=True)
+        # subprocess.run('sudo service zeppelin-notebook stop', shell=True, 
check=True)
+        # subprocess.run('sudo service zeppelin-notebook start', shell=True, 
check=True)
+        # while not zeppelin_restarted:
+        #     subprocess.run('sleep 5', shell=True, check=True)
+        #     result = subprocess.run('sudo bash -c "nmap -p 8080 localhost | 
grep closed > /dev/null" ; echo $?',
+        #                             capture_output=True, shell=True, 
check=True).stdout.decode('UTF-8').rstrip(
+        #         "\n\r")
+        #     result = result[:1]
+        #     if result == '1':
+        #         zeppelin_restarted = True
+        # subprocess.run('sleep 5', shell=True, check=True)
+        subprocess.run('echo \"Configuring HDinsight livy interpreter for 
Zeppelin\"', shell=True, check=True)
+        if False:  # multiple_emrs == 'true':
+            pass
+            # while not port_number_found:
+            #     port_free = subprocess.run('sudo bash -c "nmap -p ' + 
str(default_port) +
+            #                                ' localhost | grep closed > 
/dev/null" ; echo $?', capture_output=True,
+            #                                shell=True, 
check=True).stdout.decode('UTF-8').rstrip("\n\r")
+            #     port_free = port_free[:1]
+            #     if port_free == '0':
+            #         livy_port = default_port
+            #         port_number_found = True
+            #     else:
+            #         default_port += 1
+            # subprocess.run(
+            #     'sudo echo "livy.server.port = {0}" >> 
{1}conf/livy.conf'.format(str(livy_port), livy_path),
+            #     shell=True, check=True)
+            # subprocess.run('sudo echo "livy.spark.master = yarn" >> 
{}conf/livy.conf'.format(livy_path), shell=True,
             #                check=True)
-            # subprocess.run('sed -i "s/^export HADOOP_CONF_DIR.*/export 
HADOOP_CONF_DIR=' + \
-            #                '\/opt\/{0}\/{1}\/conf/" 
/opt/{0}/{1}/spark/conf/spark-env.sh'.format(emr_version,
-            #                                                                  
                    cluster_name),
-            #                shell=True, check=True)
+            # if 
os.path.exists('{}conf/spark-blacklist.conf'.format(livy_path)):
+            #     subprocess.run('sudo sed -i "s/^/#/g" 
{}conf/spark-blacklist.conf'.format(livy_path), shell=True,
+            #                    check=True)
             # subprocess.run(
-            #     'echo \"spark.jars $(ls {0} | tr \'\\n\' \',\')\" >> 
/opt/{1}/{2}/spark/conf/spark-defaults.conf'
-            #     .format(spark_libs, emr_version, cluster_name), shell=True, 
check=True)
-            # subprocess.run('sed -i "/spark.executorEnv.PYTHONPATH/d" 
/opt/{0}/{1}/spark/conf/spark-defaults.conf'
-            #                .format(emr_version, cluster_name), shell=True, 
check=True)
-            # subprocess.run('sed -i "/spark.yarn.dist.files/d" 
/opt/{0}/{1}/spark/conf/spark-defaults.conf'
-            #                .format(emr_version, cluster_name), shell=True, 
check=True)
-            # subprocess.run('sudo chown {0}:{0} -R 
/opt/zeppelin/'.format(os_user), shell=True, check=True)
-            # subprocess.run('sudo systemctl daemon-reload', shell=True, 
check=True)
-            # subprocess.run('sudo service zeppelin-notebook stop', 
shell=True, check=True)
-            # subprocess.run('sudo service zeppelin-notebook start', 
shell=True, check=True)
-            # while not zeppelin_restarted:
-            #     subprocess.run('sleep 5', shell=True, check=True)
-            #     result = subprocess.run('sudo bash -c "nmap -p 8080 
localhost | grep closed > /dev/null" ; echo $?',
-            #                             capture_output=True, shell=True, 
check=True).stdout.decode('UTF-8').rstrip(
-            #         "\n\r")
-            #     result = result[:1]
-            #     if result == '1':
-            #         zeppelin_restarted = True
-            # subprocess.run('sleep 5', shell=True, check=True)
-            subprocess.run('echo \"Configuring emr spark interpreter for 
Zeppelin\"', shell=True, check=True)
-            if False: #multiple_emrs == 'true':
-                pass
-                # while not port_number_found:
-                #     port_free = subprocess.run('sudo bash -c "nmap -p ' + 
str(default_port) +
-                #                                ' localhost | grep closed > 
/dev/null" ; echo $?', capture_output=True,
-                #                                shell=True, 
check=True).stdout.decode('UTF-8').rstrip("\n\r")
-                #     port_free = port_free[:1]
-                #     if port_free == '0':
-                #         livy_port = default_port
-                #         port_number_found = True
-                #     else:
-                #         default_port += 1
-                # subprocess.run(
-                #     'sudo echo "livy.server.port = {0}" >> 
{1}conf/livy.conf'.format(str(livy_port), livy_path),
-                #     shell=True, check=True)
-                # subprocess.run('sudo echo "livy.spark.master = yarn" >> 
{}conf/livy.conf'.format(livy_path), shell=True,
-                #                check=True)
-                # if 
os.path.exists('{}conf/spark-blacklist.conf'.format(livy_path)):
-                #     subprocess.run('sudo sed -i "s/^/#/g" 
{}conf/spark-blacklist.conf'.format(livy_path), shell=True,
-                #                    check=True)
-                # subprocess.run(
-                #     ''' sudo echo "export SPARK_HOME={0}" >> 
{1}conf/livy-env.sh'''.format(spark_dir, livy_path),
-                #     shell=True, check=True)
-                # subprocess.run(
-                #     ''' sudo echo "export HADOOP_CONF_DIR={0}" >> 
{1}conf/livy-env.sh'''.format(yarn_dir, livy_path),
-                #     shell=True, check=True)
-                # subprocess.run(''' sudo echo "export 
PYSPARK3_PYTHON=python{0}" >> {1}conf/livy-env.sh'''.format(
-                #     python_version[0:3],
-                #     livy_path), shell=True, check=True)
-                # template_file = "/tmp/dataengine-service_interpreter.json"
-                # fr = open(template_file, 'r+')
-                # text = fr.read()
-                # text = text.replace('CLUSTER_NAME', cluster_name)
-                # text = text.replace('SPARK_HOME', spark_dir)
-                # text = text.replace('ENDPOINTURL', endpoint_url)
-                # text = text.replace('LIVY_PORT', str(livy_port))
-                # fw = open(template_file, 'w')
-                # fw.write(text)
-                # fw.close()
-                # for _ in range(5):
-                #     try:
-                #         subprocess.run("curl --noproxy localhost -H 
'Content-Type: application/json' -X POST -d " +
-                #                        
"@/tmp/dataengine-service_interpreter.json 
http://localhost:8080/api/interpreter/setting";,
-                #                        shell=True, check=True)
-                #         break
-                #     except:
-                #         subprocess.run('sleep 5', shell=True, check=True)
-                # subprocess.run('sudo cp /opt/livy-server-cluster.service 
/etc/systemd/system/livy-server-{}.service'
-                #                .format(str(livy_port)), shell=True, 
check=True)
-                # subprocess.run("sudo sed -i 's|OS_USER|{0}|' 
/etc/systemd/system/livy-server-{1}.service"
-                #                .format(os_user, str(livy_port)), shell=True, 
check=True)
-                # subprocess.run("sudo sed -i 's|LIVY_PATH|{0}|' 
/etc/systemd/system/livy-server-{1}.service"
-                #                .format(livy_path, str(livy_port)), 
shell=True, check=True)
-                # subprocess.run('sudo chmod 644 
/etc/systemd/system/livy-server-{}.service'.format(str(livy_port)),
-                #                shell=True, check=True)
-                # subprocess.run("sudo systemctl daemon-reload", shell=True, 
check=True)
-                # subprocess.run("sudo systemctl enable 
livy-server-{}".format(str(livy_port)), shell=True, check=True)
-                # subprocess.run('sudo systemctl start 
livy-server-{}'.format(str(livy_port)), shell=True, check=True)
-            else:
-                template_file = "/tmp/dataengine-service_interpreter.json"
-                fr = open(template_file, 'r+')
-                text = fr.read()
-                text = text.replace('CLUSTERNAME', cluster_name)
-                text = text.replace('HEADNODEIP', headnode_ip)
-                text = text.replace('PORT', default_port)
-                    # text = text.replace('PYTHONVERSION', p_version)
-                    # text = text.replace('SPARK_HOME', spark_dir)
-                    # text = text.replace('PYTHONVER_SHORT', p_version[:1])
-                    # text = text.replace('ENDPOINTURL', endpoint_url)
-                    # text = text.replace('DATAENGINE-SERVICE_VERSION', 
emr_version)
-                tmp_file = "/tmp/hdinsight_interpreter_livy.json"
-                fw = open(tmp_file, 'w')
-                fw.write(text)
-                fw.close()
-                for _ in range(5):
-                    try:
-                        subprocess.run("curl --noproxy localhost -H 
'Content-Type: application/json' -X POST "
-                                       "-d 
@/tmp/hdinsight_interpreter_livy.json "
-                                       
"http://localhost:8080/api/interpreter/setting";,
-                                       shell=True, check=True)
-                        break
-                    except:
-                        subprocess.run('sleep 5', shell=True, check=True)
-            subprocess.run(
-                'touch /home/' + os_user + '/.ensure_dir/dataengine-service_' 
+ cluster_name + '_interpreter_ensured',
-                shell=True, check=True)
-        except:
-            sys.exit(1)
+            #     ''' sudo echo "export SPARK_HOME={0}" >> 
{1}conf/livy-env.sh'''.format(spark_dir, livy_path),
+            #     shell=True, check=True)
+            # subprocess.run(
+            #     ''' sudo echo "export HADOOP_CONF_DIR={0}" >> 
{1}conf/livy-env.sh'''.format(yarn_dir, livy_path),
+            #     shell=True, check=True)
+            # subprocess.run(''' sudo echo "export PYSPARK3_PYTHON=python{0}" 
>> {1}conf/livy-env.sh'''.format(
+            #     python_version[0:3],
+            #     livy_path), shell=True, check=True)
+            # template_file = "/tmp/dataengine-service_interpreter.json"
+            # fr = open(template_file, 'r+')
+            # text = fr.read()
+            # text = text.replace('CLUSTER_NAME', cluster_name)
+            # text = text.replace('SPARK_HOME', spark_dir)
+            # text = text.replace('ENDPOINTURL', endpoint_url)
+            # text = text.replace('LIVY_PORT', str(livy_port))
+            # fw = open(template_file, 'w')
+            # fw.write(text)
+            # fw.close()
+            # for _ in range(5):
+            #     try:
+            #         subprocess.run("curl --noproxy localhost -H 
'Content-Type: application/json' -X POST -d " +
+            #                        
"@/tmp/dataengine-service_interpreter.json 
http://localhost:8080/api/interpreter/setting";,
+            #                        shell=True, check=True)
+            #         break
+            #     except:
+            #         subprocess.run('sleep 5', shell=True, check=True)
+            # subprocess.run('sudo cp /opt/livy-server-cluster.service 
/etc/systemd/system/livy-server-{}.service'
+            #                .format(str(livy_port)), shell=True, check=True)
+            # subprocess.run("sudo sed -i 's|OS_USER|{0}|' 
/etc/systemd/system/livy-server-{1}.service"
+            #                .format(os_user, str(livy_port)), shell=True, 
check=True)
+            # subprocess.run("sudo sed -i 's|LIVY_PATH|{0}|' 
/etc/systemd/system/livy-server-{1}.service"
+            #                .format(livy_path, str(livy_port)), shell=True, 
check=True)
+            # subprocess.run('sudo chmod 644 
/etc/systemd/system/livy-server-{}.service'.format(str(livy_port)),
+            #                shell=True, check=True)
+            # subprocess.run("sudo systemctl daemon-reload", shell=True, 
check=True)
+            # subprocess.run("sudo systemctl enable 
livy-server-{}".format(str(livy_port)), shell=True, check=True)
+            # subprocess.run('sudo systemctl start 
livy-server-{}'.format(str(livy_port)), shell=True, check=True)
+        else:
+            template_file = "/tmp/dataengine-service_interpreter.json"
+            fr = open(template_file, 'r+')
+            text = fr.read()
+            text = text.replace('CLUSTERNAME', cluster_name)
+            text = text.replace('HEADNODEIP', headnode_ip)
+            text = text.replace('PORT', default_port)
+            # text = text.replace('PYTHONVERSION', p_version)
+            # text = text.replace('SPARK_HOME', spark_dir)
+            # text = text.replace('PYTHONVER_SHORT', p_version[:1])
+            # text = text.replace('ENDPOINTURL', endpoint_url)
+            # text = text.replace('DATAENGINE-SERVICE_VERSION', emr_version)
+            tmp_file = "/tmp/hdinsight_interpreter_livy.json"
+            fw = open(tmp_file, 'w')
+            fw.write(text)
+            fw.close()
+            for _ in range(5):
+                try:
+                    subprocess.run("curl --noproxy localhost -H 'Content-Type: 
application/json' -X POST "
+                                   "-d @/tmp/hdinsight_interpreter_livy.json "
+                                   
"http://localhost:8080/api/interpreter/setting";,
+                                   shell=True, check=True)
+                    break
+                except:
+                    subprocess.run('sleep 5', shell=True, check=True)
+        subprocess.run(
+            'touch /home/' + os_user + '/.ensure_dir/dataengine-service_' + 
cluster_name + '_interpreter_ensured',
+            shell=True, check=True)
+    except Exception as err:
+        traceback.print_exc(file=sys.stdout)
+        sys.exit(1)
 
 def ensure_local_jars(os_user, jars_dir):
     if not 
exists(datalab.fab.conn,'/home/{}/.ensure_dir/local_jars_ensured'.format(os_user)):
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py 
b/infrastructure-provisioning/src/general/lib/os/fab.py
index 928a6b5c4..923fa3d89 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -1431,6 +1431,6 @@ def update_pyopenssl_lib(os_user):
 
 def get_hdinsight_headnode_private_ip(os_user, cluster_name, keyfile):
     init_datalab_connection('{}-ssh.azurehdinsight.net'.format(cluster_name), 
os_user, keyfile)
-    headnode_private_ip = conn.sudo("cat /etc/hosts | grep headnode | awk 
'{print $1}'")
+    headnode_private_ip = conn.sudo("cat /etc/hosts | grep headnode | awk 
'{print $1}'").stdout
     conn.close()
     return headnode_private_ip
diff --git 
a/infrastructure-provisioning/src/general/scripts/azure/common_notebook_configure_dataengine-service.py
 
b/infrastructure-provisioning/src/general/scripts/azure/common_notebook_configure_dataengine-service.py
index f198ba49f..7a15da449 100644
--- 
a/infrastructure-provisioning/src/general/scripts/azure/common_notebook_configure_dataengine-service.py
+++ 
b/infrastructure-provisioning/src/general/scripts/azure/common_notebook_configure_dataengine-service.py
@@ -57,6 +57,7 @@ if __name__ == "__main__":
     notebook_config['endpoint_name'] = 
(os.environ['endpoint_name']).replace('_', '-').lower()
     notebook_config['endpoint_tag'] = notebook_config['endpoint_name']
     notebook_config['tag_name'] = notebook_config['service_base_name'] + '-tag'
+    notebook_config['computational_name'] = 
os.environ['computational_name'].replace('_', '-').lower()
     notebook_config['bucket_name'] = 
'{0}-{1}-{2}-bucket'.format(notebook_config['service_base_name'],
                                                                  
notebook_config['project_name'],
                                                                  
notebook_config['endpoint_name'])
@@ -84,11 +85,11 @@ if __name__ == "__main__":
     try:
         logging.info('[INSTALLING KERNELS INTO SPECIFIED NOTEBOOK]')
         params = "--bucket {} --cluster_name {} --hdinsight_version {} 
--keyfile {} --notebook_ip {} --region {} " \
-                 "--edge_user_name {} --project_name {} --os_user {}  
--edge_hostname {} --proxy_port {} " \
-                 "--scala_version {} --application {} --headnode_ip" \
+                 "--project_name {} --os_user {}  --edge_hostname {} 
--proxy_port {} " \
+                 "--scala_version {} --application {} --headnode_ip {}" \
             .format(notebook_config['storage_account_name_tag'], 
notebook_config['cluster_name'], os.environ['hdinsight_version'],
                     notebook_config['key_path'], 
notebook_config['notebook_ip'], os.environ['gcp_region'],
-                    notebook_config['edge_user_name'], 
notebook_config['project_name'], os.environ['conf_os_user'],
+                    notebook_config['project_name'], 
os.environ['conf_os_user'],
                     edge_instance_hostname, '3128', 
os.environ['notebook_scala_version'], os.environ['application'],
                     notebook_config['headnode_ip'])
         try:
@@ -99,7 +100,7 @@ if __name__ == "__main__":
             raise Exception
     except Exception as err:
         clear_resources()
-        datalab.fab.append_result("Failed installing Dataproc kernels.", 
str(err))
+        datalab.fab.append_result("Failed installing HDinsight kernels.", 
str(err))
         sys.exit(1)
 
     try:
diff --git 
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
 
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
index cf0fc316c..4a5a88b98 100644
--- 
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
+++ 
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
@@ -81,6 +81,15 @@ if __name__ == "__main__":
     except:
         sys.exit(1)
 
+    logging.info("[REMOVING NOTEBOOK KERNELS]")
+    try:
+        
AzureActions.remove_dataengine_kernels(hdinsight_conf['resource_group_name'],
+                                               
hdinsight_conf['notebook_instance_name'], os.environ['conf_os_user'],
+                                               hdinsight_conf['key_path'], 
hdinsight_conf['cluster_name'])
+    except Exception as err:
+        datalab.fab.append_result("Failed to remove dataengine kernels from 
notebook", str(err))
+        sys.exit(1)
+
     try:
         with open("/root/result.json", 'w') as result:
             res = {"dataengine-service_name": 
hdinsight_conf['computational_name'],
diff --git 
a/infrastructure-provisioning/src/general/scripts/azure/zeppelin_dataengine-service_create_configs.py
 
b/infrastructure-provisioning/src/general/scripts/azure/zeppelin_dataengine-service_create_configs.py
index ad8cc6731..2ad68e997 100644
--- 
a/infrastructure-provisioning/src/general/scripts/azure/zeppelin_dataengine-service_create_configs.py
+++ 
b/infrastructure-provisioning/src/general/scripts/azure/zeppelin_dataengine-service_create_configs.py
@@ -23,9 +23,9 @@
 
 import argparse
 import subprocess
-from datalab.actions_lib import jars, yarn, install_hdinsight_spark, 
spark_defaults, installing_python, configure_zeppelin_hdinsight_interpreter
+from datalab.actions_lib import configure_zeppelin_hdinsight_interpreter  # , 
jars, yarn, install_hdinsight_spark, spark_defaults, installing_python
 from datalab.common_lib import *
-from datalab.fab import configuring_notebook, update_zeppelin_interpreters
+# from datalab.fab import configuring_notebook, update_zeppelin_interpreters
 from datalab.notebook_lib import *
 from fabric import *
 
diff --git 
a/infrastructure-provisioning/src/general/scripts/azure/zeppelin_install_dataengine-service_kernels.py
 
b/infrastructure-provisioning/src/general/scripts/azure/zeppelin_install_dataengine-service_kernels.py
index 2ffbf2a7c..80211aed4 100644
--- 
a/infrastructure-provisioning/src/general/scripts/azure/zeppelin_install_dataengine-service_kernels.py
+++ 
b/infrastructure-provisioning/src/general/scripts/azure/zeppelin_install_dataengine-service_kernels.py
@@ -24,7 +24,9 @@
 import argparse
 import os
 from datalab.meta_lib import *
+from datalab.fab import init_datalab_connection
 from fabric import *
+from patchwork.files import exists
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--bucket', type=str, default='')
@@ -51,7 +53,7 @@ def configure_notebook(args):
     if os.environ['notebook_multiple_clusters'] == 'true':
         conn.put(templates_dir + 'dataengine-service_interpreter_livy.json', 
'/tmp/dataengine-service_interpreter.json')
     else:
-        conn.put(templates_dir + 'dataengine-service_interpreter_spark.json', 
'/tmp/dataengine-service_interpreter.json')
+        conn.put(templates_dir + 'dataengine-service_interpreter_livy.json', 
'/tmp/dataengine-service_interpreter.json')
     conn.put('{}{}_dataengine-service_create_configs.py'.format(scripts_dir, 
args.application),
              '/tmp/zeppelin_dataengine-service_create_configs.py')
     conn.sudo('\cp /tmp/zeppelin_dataengine-service_create_configs.py '
@@ -69,7 +71,7 @@ def configure_notebook(args):
 
 if __name__ == "__main__":
     global conn
-    conn = datalab.fab.init_datalab_connection(args.notebook_ip, args.os_user, 
args.keyfile)
+    conn = init_datalab_connection(args.notebook_ip, args.os_user, 
args.keyfile)
     configure_notebook(args)
     spark_version = "None"  #get_spark_version(args.cluster_name)
     hadoop_version = "None"  #get_hadoop_version(args.cluster_name)
diff --git 
a/infrastructure-provisioning/src/general/templates/azure/dataengine-service_interpreter_livy.json
 
b/infrastructure-provisioning/src/general/templates/azure/dataengine-service_interpreter_livy.json
index 5bd79823e..1f5453f6b 100644
--- 
a/infrastructure-provisioning/src/general/templates/azure/dataengine-service_interpreter_livy.json
+++ 
b/infrastructure-provisioning/src/general/templates/azure/dataengine-service_interpreter_livy.json
@@ -5,7 +5,7 @@
    "properties":{
       "zeppelin.livy.url":{
          "name":"zeppelin.livy.url",
-         "value":"https://HEADNODEIP:PORT";,
+         "value":"http://HEADNODEIP:PORT";,
          "type":"url",
          "description":"The URL for Livy Server."
       },


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[incubator-datalab] branch DATALAB-2998 updated: [DATALAB-2998]: fixed zeppelin hdinsight interpreter addition

Reply via email to