This is an automated email from the ASF dual-hosted git repository.
lfrolov pushed a commit to branch DATALAB-2998
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
The following commit(s) were added to refs/heads/DATALAB-2998 by this push:
new 23570d214 [DATALAB-2998]: fixed zeppelin hdinsight interpreter addition
23570d214 is described below
commit 23570d214708ed44bdb8565ed360527a4fb592b6
Author: leonidfrolov <[email protected]>
AuthorDate: Thu Sep 22 14:51:32 2022 +0300
[DATALAB-2998]: fixed zeppelin hdinsight interpreter addition
---
.../src/general/files/azure/zeppelin_Dockerfile | 1 +
.../src/general/lib/azure/actions_lib.py | 306 ++++++++++++---------
.../src/general/lib/os/fab.py | 2 +-
...common_notebook_configure_dataengine-service.py | 9 +-
.../scripts/azure/dataengine-service_terminate.py | 9 +
.../zeppelin_dataengine-service_create_configs.py | 4 +-
.../zeppelin_install_dataengine-service_kernels.py | 6 +-
.../azure/dataengine-service_interpreter_livy.json | 2 +-
8 files changed, 192 insertions(+), 147 deletions(-)
diff --git
a/infrastructure-provisioning/src/general/files/azure/zeppelin_Dockerfile
b/infrastructure-provisioning/src/general/files/azure/zeppelin_Dockerfile
index b2a0a6f90..46f01d148 100644
--- a/infrastructure-provisioning/src/general/files/azure/zeppelin_Dockerfile
+++ b/infrastructure-provisioning/src/general/files/azure/zeppelin_Dockerfile
@@ -28,6 +28,7 @@ COPY zeppelin/ /root/
COPY general/scripts/os/* /root/scripts/
COPY general/scripts/azure/zeppelin_* /root/scripts/
COPY general/lib/os/${OS}/notebook_lib.py
/usr/lib/python3.8/datalab/notebook_lib.py
+COPY general/templates/azure/dataengine-service_interpreter_livy.json
/root/templates/
COPY general/templates/azure/interpreter_livy.json /root/templates/
COPY general/templates/azure/interpreter_spark.json /root/templates/
COPY general/templates/os/dataengine_interpreter_livy.json /root/templates/
diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index 63bdc793a..1e3c23275 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -1130,6 +1130,37 @@ class AzureActions:
conn.sudo('rm -rf
/home/{}/.ensure_dir/dataengine_{}_interpreter_ensured'.format(os_user,
cluster_name))
if exists(conn,
'/home/{}/.ensure_dir/rstudio_dataengine_ensured'.format(os_user)):
datalab.fab.remove_rstudio_dataengines_kernel(os.environ['computational_name'],
os_user)
+ if exists(conn,
'/home/{}/.ensure_dir/dataengine-service_{}_interpreter_ensured'.format(os_user,
+
cluster_name)):
+ conn.sudo("rm -rf
/home/{}/.ensure_dir/dataengine-service_interpreter_ensure".format(os_user))
+ zeppelin_url = 'http://' + private +
':8080/api/interpreter/setting/'
+ opener =
urllib.request.build_opener(urllib.request.ProxyHandler({}))
+ req = opener.open(urllib.request.Request(zeppelin_url))
+ r_text = req.read()
+ interpreter_json = json.loads(r_text)
+ interpreter_prefix = cluster_name
+ for interpreter in interpreter_json['body']:
+ if interpreter_prefix in interpreter['name']:
+ print("Interpreter with ID: {0} and name: {1} will be
removed from zeppelin!".
+ format(interpreter['id'], interpreter['name']))
+ request = urllib.request.Request(zeppelin_url +
interpreter['id'], data=''.encode())
+ request.get_method = lambda: 'DELETE'
+ url = opener.open(request)
+ print(url.read())
+ conn.sudo('chown ' + os_user + ':' + os_user + ' -R
/opt/zeppelin/')
+ conn.sudo('systemctl daemon-reload')
+ conn.sudo("service zeppelin-notebook stop")
+ conn.sudo("service zeppelin-notebook start")
+ zeppelin_restarted = False
+ while not zeppelin_restarted:
+ conn.sudo('sleep 5')
+ result = conn.sudo('nmap -p 8080 localhost | grep "closed"
> /dev/null; echo $?').stdout
+ result = result[:1]
+ if result == '1':
+ zeppelin_restarted = True
+ conn.sudo('sleep 5')
+ conn.sudo('rm -rf
/home/{}/.ensure_dir/dataengine-service_{}_interpreter_ensured'.format(os_user,
+
cluster_name))
conn.sudo('rm -rf /opt/' + cluster_name + '/')
print("Notebook's {} kernels were removed".format(private))
except Exception as err:
@@ -1209,145 +1240,146 @@ class AzureActions:
file=sys.stdout)}))
traceback.print_exc(file=sys.stdout)
- def configure_zeppelin_hdinsight_interpreter(self, cluster_name, os_user,
headnode_ip):
- try:
- # (self, emr_version, cluster_name, region, spark_dir, os_user,
yarn_dir, bucket,
- # user_name,
endpoint_url, multiple_emrs)
- # port_number_found = False
- # zeppelin_restarted = False
- default_port = 8998
- # get_cluster_python_version(region, bucket, user_name,
cluster_name)
- # with open('/tmp/python_version') as f:
- # python_version = f.read()
- # python_version = python_version[0:5]
- # livy_port = ''
- # livy_path = '/opt/{0}/{1}/livy/'.format(emr_version,
cluster_name)
- # spark_libs =
"/opt/{0}/jars/usr/share/aws/aws-java-sdk/aws-java-sdk-core*.jar " \
- # "/opt/{0}/jars/usr/lib/hadoop/hadoop-aws*.jar " \
- #
"/opt/{0}/jars/usr/share/aws/aws-java-sdk/aws-java-sdk-s3-*.jar " \
- #
"/opt/{0}/jars/usr/lib/hadoop-lzo/lib/hadoop-lzo-*.jar".format(emr_version)
- # # fix due to: Multiple py4j files found under
..../spark/python/lib
- # # py4j-0.10.7-src.zip still in folder. Versions may varies.
- # subprocess.run('rm
/opt/{0}/{1}/spark/python/lib/py4j-src.zip'.format(emr_version, cluster_name),
- # shell=True, check=True)
- #
- # subprocess.run('echo \"Configuring emr path for Zeppelin\"',
shell=True, check=True)
- # subprocess.run('sed -i \"s/^export SPARK_HOME.*/export
SPARK_HOME=\/opt\/{0}\/{1}\/spark/\" '
- #
'/opt/zeppelin/conf/zeppelin-env.sh'.format(emr_version, cluster_name),
shell=True,
+def configure_zeppelin_hdinsight_interpreter(cluster_name, os_user,
headnode_ip):
+ try:
+ # (self, emr_version, cluster_name, region, spark_dir, os_user,
yarn_dir, bucket,
+ # user_name, endpoint_url,
multiple_emrs)
+ # port_number_found = False
+ # zeppelin_restarted = False
+ default_port = '8998'
+ # get_cluster_python_version(region, bucket, user_name, cluster_name)
+ # with open('/tmp/python_version') as f:
+ # python_version = f.read()
+ # python_version = python_version[0:5]
+ # livy_port = ''
+ # livy_path = '/opt/{0}/{1}/livy/'.format(emr_version, cluster_name)
+ # spark_libs =
"/opt/{0}/jars/usr/share/aws/aws-java-sdk/aws-java-sdk-core*.jar " \
+ # "/opt/{0}/jars/usr/lib/hadoop/hadoop-aws*.jar " \
+ #
"/opt/{0}/jars/usr/share/aws/aws-java-sdk/aws-java-sdk-s3-*.jar " \
+ #
"/opt/{0}/jars/usr/lib/hadoop-lzo/lib/hadoop-lzo-*.jar".format(emr_version)
+ # # fix due to: Multiple py4j files found under ..../spark/python/lib
+ # # py4j-0.10.7-src.zip still in folder. Versions may varies.
+ # subprocess.run('rm
/opt/{0}/{1}/spark/python/lib/py4j-src.zip'.format(emr_version, cluster_name),
+ # shell=True, check=True)
+ #
+ # subprocess.run('echo \"Configuring emr path for Zeppelin\"',
shell=True, check=True)
+ # subprocess.run('sed -i \"s/^export SPARK_HOME.*/export
SPARK_HOME=\/opt\/{0}\/{1}\/spark/\" '
+ #
'/opt/zeppelin/conf/zeppelin-env.sh'.format(emr_version, cluster_name),
shell=True,
+ # check=True)
+ # subprocess.run('sed -i "s/^export HADOOP_CONF_DIR.*/export
HADOOP_CONF_DIR=' + \
+ # '\/opt\/{0}\/{1}\/conf/"
/opt/{0}/{1}/spark/conf/spark-env.sh'.format(emr_version,
+ #
cluster_name),
+ # shell=True, check=True)
+ # subprocess.run(
+ # 'echo \"spark.jars $(ls {0} | tr \'\\n\' \',\')\" >>
/opt/{1}/{2}/spark/conf/spark-defaults.conf'
+ # .format(spark_libs, emr_version, cluster_name), shell=True,
check=True)
+ # subprocess.run('sed -i "/spark.executorEnv.PYTHONPATH/d"
/opt/{0}/{1}/spark/conf/spark-defaults.conf'
+ # .format(emr_version, cluster_name), shell=True,
check=True)
+ # subprocess.run('sed -i "/spark.yarn.dist.files/d"
/opt/{0}/{1}/spark/conf/spark-defaults.conf'
+ # .format(emr_version, cluster_name), shell=True,
check=True)
+ # subprocess.run('sudo chown {0}:{0} -R
/opt/zeppelin/'.format(os_user), shell=True, check=True)
+ # subprocess.run('sudo systemctl daemon-reload', shell=True,
check=True)
+ # subprocess.run('sudo service zeppelin-notebook stop', shell=True,
check=True)
+ # subprocess.run('sudo service zeppelin-notebook start', shell=True,
check=True)
+ # while not zeppelin_restarted:
+ # subprocess.run('sleep 5', shell=True, check=True)
+ # result = subprocess.run('sudo bash -c "nmap -p 8080 localhost |
grep closed > /dev/null" ; echo $?',
+ # capture_output=True, shell=True,
check=True).stdout.decode('UTF-8').rstrip(
+ # "\n\r")
+ # result = result[:1]
+ # if result == '1':
+ # zeppelin_restarted = True
+ # subprocess.run('sleep 5', shell=True, check=True)
+ subprocess.run('echo \"Configuring HDinsight livy interpreter for
Zeppelin\"', shell=True, check=True)
+ if False: # multiple_emrs == 'true':
+ pass
+ # while not port_number_found:
+ # port_free = subprocess.run('sudo bash -c "nmap -p ' +
str(default_port) +
+ # ' localhost | grep closed >
/dev/null" ; echo $?', capture_output=True,
+ # shell=True,
check=True).stdout.decode('UTF-8').rstrip("\n\r")
+ # port_free = port_free[:1]
+ # if port_free == '0':
+ # livy_port = default_port
+ # port_number_found = True
+ # else:
+ # default_port += 1
+ # subprocess.run(
+ # 'sudo echo "livy.server.port = {0}" >>
{1}conf/livy.conf'.format(str(livy_port), livy_path),
+ # shell=True, check=True)
+ # subprocess.run('sudo echo "livy.spark.master = yarn" >>
{}conf/livy.conf'.format(livy_path), shell=True,
# check=True)
- # subprocess.run('sed -i "s/^export HADOOP_CONF_DIR.*/export
HADOOP_CONF_DIR=' + \
- # '\/opt\/{0}\/{1}\/conf/"
/opt/{0}/{1}/spark/conf/spark-env.sh'.format(emr_version,
- #
cluster_name),
- # shell=True, check=True)
+ # if
os.path.exists('{}conf/spark-blacklist.conf'.format(livy_path)):
+ # subprocess.run('sudo sed -i "s/^/#/g"
{}conf/spark-blacklist.conf'.format(livy_path), shell=True,
+ # check=True)
# subprocess.run(
- # 'echo \"spark.jars $(ls {0} | tr \'\\n\' \',\')\" >>
/opt/{1}/{2}/spark/conf/spark-defaults.conf'
- # .format(spark_libs, emr_version, cluster_name), shell=True,
check=True)
- # subprocess.run('sed -i "/spark.executorEnv.PYTHONPATH/d"
/opt/{0}/{1}/spark/conf/spark-defaults.conf'
- # .format(emr_version, cluster_name), shell=True,
check=True)
- # subprocess.run('sed -i "/spark.yarn.dist.files/d"
/opt/{0}/{1}/spark/conf/spark-defaults.conf'
- # .format(emr_version, cluster_name), shell=True,
check=True)
- # subprocess.run('sudo chown {0}:{0} -R
/opt/zeppelin/'.format(os_user), shell=True, check=True)
- # subprocess.run('sudo systemctl daemon-reload', shell=True,
check=True)
- # subprocess.run('sudo service zeppelin-notebook stop',
shell=True, check=True)
- # subprocess.run('sudo service zeppelin-notebook start',
shell=True, check=True)
- # while not zeppelin_restarted:
- # subprocess.run('sleep 5', shell=True, check=True)
- # result = subprocess.run('sudo bash -c "nmap -p 8080
localhost | grep closed > /dev/null" ; echo $?',
- # capture_output=True, shell=True,
check=True).stdout.decode('UTF-8').rstrip(
- # "\n\r")
- # result = result[:1]
- # if result == '1':
- # zeppelin_restarted = True
- # subprocess.run('sleep 5', shell=True, check=True)
- subprocess.run('echo \"Configuring emr spark interpreter for
Zeppelin\"', shell=True, check=True)
- if False: #multiple_emrs == 'true':
- pass
- # while not port_number_found:
- # port_free = subprocess.run('sudo bash -c "nmap -p ' +
str(default_port) +
- # ' localhost | grep closed >
/dev/null" ; echo $?', capture_output=True,
- # shell=True,
check=True).stdout.decode('UTF-8').rstrip("\n\r")
- # port_free = port_free[:1]
- # if port_free == '0':
- # livy_port = default_port
- # port_number_found = True
- # else:
- # default_port += 1
- # subprocess.run(
- # 'sudo echo "livy.server.port = {0}" >>
{1}conf/livy.conf'.format(str(livy_port), livy_path),
- # shell=True, check=True)
- # subprocess.run('sudo echo "livy.spark.master = yarn" >>
{}conf/livy.conf'.format(livy_path), shell=True,
- # check=True)
- # if
os.path.exists('{}conf/spark-blacklist.conf'.format(livy_path)):
- # subprocess.run('sudo sed -i "s/^/#/g"
{}conf/spark-blacklist.conf'.format(livy_path), shell=True,
- # check=True)
- # subprocess.run(
- # ''' sudo echo "export SPARK_HOME={0}" >>
{1}conf/livy-env.sh'''.format(spark_dir, livy_path),
- # shell=True, check=True)
- # subprocess.run(
- # ''' sudo echo "export HADOOP_CONF_DIR={0}" >>
{1}conf/livy-env.sh'''.format(yarn_dir, livy_path),
- # shell=True, check=True)
- # subprocess.run(''' sudo echo "export
PYSPARK3_PYTHON=python{0}" >> {1}conf/livy-env.sh'''.format(
- # python_version[0:3],
- # livy_path), shell=True, check=True)
- # template_file = "/tmp/dataengine-service_interpreter.json"
- # fr = open(template_file, 'r+')
- # text = fr.read()
- # text = text.replace('CLUSTER_NAME', cluster_name)
- # text = text.replace('SPARK_HOME', spark_dir)
- # text = text.replace('ENDPOINTURL', endpoint_url)
- # text = text.replace('LIVY_PORT', str(livy_port))
- # fw = open(template_file, 'w')
- # fw.write(text)
- # fw.close()
- # for _ in range(5):
- # try:
- # subprocess.run("curl --noproxy localhost -H
'Content-Type: application/json' -X POST -d " +
- #
"@/tmp/dataengine-service_interpreter.json
http://localhost:8080/api/interpreter/setting",
- # shell=True, check=True)
- # break
- # except:
- # subprocess.run('sleep 5', shell=True, check=True)
- # subprocess.run('sudo cp /opt/livy-server-cluster.service
/etc/systemd/system/livy-server-{}.service'
- # .format(str(livy_port)), shell=True,
check=True)
- # subprocess.run("sudo sed -i 's|OS_USER|{0}|'
/etc/systemd/system/livy-server-{1}.service"
- # .format(os_user, str(livy_port)), shell=True,
check=True)
- # subprocess.run("sudo sed -i 's|LIVY_PATH|{0}|'
/etc/systemd/system/livy-server-{1}.service"
- # .format(livy_path, str(livy_port)),
shell=True, check=True)
- # subprocess.run('sudo chmod 644
/etc/systemd/system/livy-server-{}.service'.format(str(livy_port)),
- # shell=True, check=True)
- # subprocess.run("sudo systemctl daemon-reload", shell=True,
check=True)
- # subprocess.run("sudo systemctl enable
livy-server-{}".format(str(livy_port)), shell=True, check=True)
- # subprocess.run('sudo systemctl start
livy-server-{}'.format(str(livy_port)), shell=True, check=True)
- else:
- template_file = "/tmp/dataengine-service_interpreter.json"
- fr = open(template_file, 'r+')
- text = fr.read()
- text = text.replace('CLUSTERNAME', cluster_name)
- text = text.replace('HEADNODEIP', headnode_ip)
- text = text.replace('PORT', default_port)
- # text = text.replace('PYTHONVERSION', p_version)
- # text = text.replace('SPARK_HOME', spark_dir)
- # text = text.replace('PYTHONVER_SHORT', p_version[:1])
- # text = text.replace('ENDPOINTURL', endpoint_url)
- # text = text.replace('DATAENGINE-SERVICE_VERSION',
emr_version)
- tmp_file = "/tmp/hdinsight_interpreter_livy.json"
- fw = open(tmp_file, 'w')
- fw.write(text)
- fw.close()
- for _ in range(5):
- try:
- subprocess.run("curl --noproxy localhost -H
'Content-Type: application/json' -X POST "
- "-d
@/tmp/hdinsight_interpreter_livy.json "
-
"http://localhost:8080/api/interpreter/setting",
- shell=True, check=True)
- break
- except:
- subprocess.run('sleep 5', shell=True, check=True)
- subprocess.run(
- 'touch /home/' + os_user + '/.ensure_dir/dataengine-service_'
+ cluster_name + '_interpreter_ensured',
- shell=True, check=True)
- except:
- sys.exit(1)
+ # ''' sudo echo "export SPARK_HOME={0}" >>
{1}conf/livy-env.sh'''.format(spark_dir, livy_path),
+ # shell=True, check=True)
+ # subprocess.run(
+ # ''' sudo echo "export HADOOP_CONF_DIR={0}" >>
{1}conf/livy-env.sh'''.format(yarn_dir, livy_path),
+ # shell=True, check=True)
+ # subprocess.run(''' sudo echo "export PYSPARK3_PYTHON=python{0}"
>> {1}conf/livy-env.sh'''.format(
+ # python_version[0:3],
+ # livy_path), shell=True, check=True)
+ # template_file = "/tmp/dataengine-service_interpreter.json"
+ # fr = open(template_file, 'r+')
+ # text = fr.read()
+ # text = text.replace('CLUSTER_NAME', cluster_name)
+ # text = text.replace('SPARK_HOME', spark_dir)
+ # text = text.replace('ENDPOINTURL', endpoint_url)
+ # text = text.replace('LIVY_PORT', str(livy_port))
+ # fw = open(template_file, 'w')
+ # fw.write(text)
+ # fw.close()
+ # for _ in range(5):
+ # try:
+ # subprocess.run("curl --noproxy localhost -H
'Content-Type: application/json' -X POST -d " +
+ #
"@/tmp/dataengine-service_interpreter.json
http://localhost:8080/api/interpreter/setting",
+ # shell=True, check=True)
+ # break
+ # except:
+ # subprocess.run('sleep 5', shell=True, check=True)
+ # subprocess.run('sudo cp /opt/livy-server-cluster.service
/etc/systemd/system/livy-server-{}.service'
+ # .format(str(livy_port)), shell=True, check=True)
+ # subprocess.run("sudo sed -i 's|OS_USER|{0}|'
/etc/systemd/system/livy-server-{1}.service"
+ # .format(os_user, str(livy_port)), shell=True,
check=True)
+ # subprocess.run("sudo sed -i 's|LIVY_PATH|{0}|'
/etc/systemd/system/livy-server-{1}.service"
+ # .format(livy_path, str(livy_port)), shell=True,
check=True)
+ # subprocess.run('sudo chmod 644
/etc/systemd/system/livy-server-{}.service'.format(str(livy_port)),
+ # shell=True, check=True)
+ # subprocess.run("sudo systemctl daemon-reload", shell=True,
check=True)
+ # subprocess.run("sudo systemctl enable
livy-server-{}".format(str(livy_port)), shell=True, check=True)
+ # subprocess.run('sudo systemctl start
livy-server-{}'.format(str(livy_port)), shell=True, check=True)
+ else:
+ template_file = "/tmp/dataengine-service_interpreter.json"
+ fr = open(template_file, 'r+')
+ text = fr.read()
+ text = text.replace('CLUSTERNAME', cluster_name)
+ text = text.replace('HEADNODEIP', headnode_ip)
+ text = text.replace('PORT', default_port)
+ # text = text.replace('PYTHONVERSION', p_version)
+ # text = text.replace('SPARK_HOME', spark_dir)
+ # text = text.replace('PYTHONVER_SHORT', p_version[:1])
+ # text = text.replace('ENDPOINTURL', endpoint_url)
+ # text = text.replace('DATAENGINE-SERVICE_VERSION', emr_version)
+ tmp_file = "/tmp/hdinsight_interpreter_livy.json"
+ fw = open(tmp_file, 'w')
+ fw.write(text)
+ fw.close()
+ for _ in range(5):
+ try:
+ subprocess.run("curl --noproxy localhost -H 'Content-Type:
application/json' -X POST "
+ "-d @/tmp/hdinsight_interpreter_livy.json "
+
"http://localhost:8080/api/interpreter/setting",
+ shell=True, check=True)
+ break
+ except:
+ subprocess.run('sleep 5', shell=True, check=True)
+ subprocess.run(
+ 'touch /home/' + os_user + '/.ensure_dir/dataengine-service_' +
cluster_name + '_interpreter_ensured',
+ shell=True, check=True)
+ except Exception as err:
+ traceback.print_exc(file=sys.stdout)
+ sys.exit(1)
def ensure_local_jars(os_user, jars_dir):
if not
exists(datalab.fab.conn,'/home/{}/.ensure_dir/local_jars_ensured'.format(os_user)):
diff --git a/infrastructure-provisioning/src/general/lib/os/fab.py
b/infrastructure-provisioning/src/general/lib/os/fab.py
index 928a6b5c4..923fa3d89 100644
--- a/infrastructure-provisioning/src/general/lib/os/fab.py
+++ b/infrastructure-provisioning/src/general/lib/os/fab.py
@@ -1431,6 +1431,6 @@ def update_pyopenssl_lib(os_user):
def get_hdinsight_headnode_private_ip(os_user, cluster_name, keyfile):
init_datalab_connection('{}-ssh.azurehdinsight.net'.format(cluster_name),
os_user, keyfile)
- headnode_private_ip = conn.sudo("cat /etc/hosts | grep headnode | awk
'{print $1}'")
+ headnode_private_ip = conn.sudo("cat /etc/hosts | grep headnode | awk
'{print $1}'").stdout
conn.close()
return headnode_private_ip
diff --git
a/infrastructure-provisioning/src/general/scripts/azure/common_notebook_configure_dataengine-service.py
b/infrastructure-provisioning/src/general/scripts/azure/common_notebook_configure_dataengine-service.py
index f198ba49f..7a15da449 100644
---
a/infrastructure-provisioning/src/general/scripts/azure/common_notebook_configure_dataengine-service.py
+++
b/infrastructure-provisioning/src/general/scripts/azure/common_notebook_configure_dataengine-service.py
@@ -57,6 +57,7 @@ if __name__ == "__main__":
notebook_config['endpoint_name'] =
(os.environ['endpoint_name']).replace('_', '-').lower()
notebook_config['endpoint_tag'] = notebook_config['endpoint_name']
notebook_config['tag_name'] = notebook_config['service_base_name'] + '-tag'
+ notebook_config['computational_name'] =
os.environ['computational_name'].replace('_', '-').lower()
notebook_config['bucket_name'] =
'{0}-{1}-{2}-bucket'.format(notebook_config['service_base_name'],
notebook_config['project_name'],
notebook_config['endpoint_name'])
@@ -84,11 +85,11 @@ if __name__ == "__main__":
try:
logging.info('[INSTALLING KERNELS INTO SPECIFIED NOTEBOOK]')
params = "--bucket {} --cluster_name {} --hdinsight_version {}
--keyfile {} --notebook_ip {} --region {} " \
- "--edge_user_name {} --project_name {} --os_user {}
--edge_hostname {} --proxy_port {} " \
- "--scala_version {} --application {} --headnode_ip" \
+ "--project_name {} --os_user {} --edge_hostname {}
--proxy_port {} " \
+ "--scala_version {} --application {} --headnode_ip {}" \
.format(notebook_config['storage_account_name_tag'],
notebook_config['cluster_name'], os.environ['hdinsight_version'],
notebook_config['key_path'],
notebook_config['notebook_ip'], os.environ['gcp_region'],
- notebook_config['edge_user_name'],
notebook_config['project_name'], os.environ['conf_os_user'],
+ notebook_config['project_name'],
os.environ['conf_os_user'],
edge_instance_hostname, '3128',
os.environ['notebook_scala_version'], os.environ['application'],
notebook_config['headnode_ip'])
try:
@@ -99,7 +100,7 @@ if __name__ == "__main__":
raise Exception
except Exception as err:
clear_resources()
- datalab.fab.append_result("Failed installing Dataproc kernels.",
str(err))
+ datalab.fab.append_result("Failed installing HDinsight kernels.",
str(err))
sys.exit(1)
try:
diff --git
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
index cf0fc316c..4a5a88b98 100644
---
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
+++
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py
@@ -81,6 +81,15 @@ if __name__ == "__main__":
except:
sys.exit(1)
+ logging.info("[REMOVING NOTEBOOK KERNELS]")
+ try:
+
AzureActions.remove_dataengine_kernels(hdinsight_conf['resource_group_name'],
+
hdinsight_conf['notebook_instance_name'], os.environ['conf_os_user'],
+ hdinsight_conf['key_path'],
hdinsight_conf['cluster_name'])
+ except Exception as err:
+ datalab.fab.append_result("Failed to remove dataengine kernels from
notebook", str(err))
+ sys.exit(1)
+
try:
with open("/root/result.json", 'w') as result:
res = {"dataengine-service_name":
hdinsight_conf['computational_name'],
diff --git
a/infrastructure-provisioning/src/general/scripts/azure/zeppelin_dataengine-service_create_configs.py
b/infrastructure-provisioning/src/general/scripts/azure/zeppelin_dataengine-service_create_configs.py
index ad8cc6731..2ad68e997 100644
---
a/infrastructure-provisioning/src/general/scripts/azure/zeppelin_dataengine-service_create_configs.py
+++
b/infrastructure-provisioning/src/general/scripts/azure/zeppelin_dataengine-service_create_configs.py
@@ -23,9 +23,9 @@
import argparse
import subprocess
-from datalab.actions_lib import jars, yarn, install_hdinsight_spark,
spark_defaults, installing_python, configure_zeppelin_hdinsight_interpreter
+from datalab.actions_lib import configure_zeppelin_hdinsight_interpreter # ,
jars, yarn, install_hdinsight_spark, spark_defaults, installing_python
from datalab.common_lib import *
-from datalab.fab import configuring_notebook, update_zeppelin_interpreters
+# from datalab.fab import configuring_notebook, update_zeppelin_interpreters
from datalab.notebook_lib import *
from fabric import *
diff --git
a/infrastructure-provisioning/src/general/scripts/azure/zeppelin_install_dataengine-service_kernels.py
b/infrastructure-provisioning/src/general/scripts/azure/zeppelin_install_dataengine-service_kernels.py
index 2ffbf2a7c..80211aed4 100644
---
a/infrastructure-provisioning/src/general/scripts/azure/zeppelin_install_dataengine-service_kernels.py
+++
b/infrastructure-provisioning/src/general/scripts/azure/zeppelin_install_dataengine-service_kernels.py
@@ -24,7 +24,9 @@
import argparse
import os
from datalab.meta_lib import *
+from datalab.fab import init_datalab_connection
from fabric import *
+from patchwork.files import exists
parser = argparse.ArgumentParser()
parser.add_argument('--bucket', type=str, default='')
@@ -51,7 +53,7 @@ def configure_notebook(args):
if os.environ['notebook_multiple_clusters'] == 'true':
conn.put(templates_dir + 'dataengine-service_interpreter_livy.json',
'/tmp/dataengine-service_interpreter.json')
else:
- conn.put(templates_dir + 'dataengine-service_interpreter_spark.json',
'/tmp/dataengine-service_interpreter.json')
+ conn.put(templates_dir + 'dataengine-service_interpreter_livy.json',
'/tmp/dataengine-service_interpreter.json')
conn.put('{}{}_dataengine-service_create_configs.py'.format(scripts_dir,
args.application),
'/tmp/zeppelin_dataengine-service_create_configs.py')
conn.sudo('\cp /tmp/zeppelin_dataengine-service_create_configs.py '
@@ -69,7 +71,7 @@ def configure_notebook(args):
if __name__ == "__main__":
global conn
- conn = datalab.fab.init_datalab_connection(args.notebook_ip, args.os_user,
args.keyfile)
+ conn = init_datalab_connection(args.notebook_ip, args.os_user,
args.keyfile)
configure_notebook(args)
spark_version = "None" #get_spark_version(args.cluster_name)
hadoop_version = "None" #get_hadoop_version(args.cluster_name)
diff --git
a/infrastructure-provisioning/src/general/templates/azure/dataengine-service_interpreter_livy.json
b/infrastructure-provisioning/src/general/templates/azure/dataengine-service_interpreter_livy.json
index 5bd79823e..1f5453f6b 100644
---
a/infrastructure-provisioning/src/general/templates/azure/dataengine-service_interpreter_livy.json
+++
b/infrastructure-provisioning/src/general/templates/azure/dataengine-service_interpreter_livy.json
@@ -5,7 +5,7 @@
"properties":{
"zeppelin.livy.url":{
"name":"zeppelin.livy.url",
- "value":"https://HEADNODEIP:PORT",
+ "value":"http://HEADNODEIP:PORT",
"type":"url",
"description":"The URL for Livy Server."
},
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]