This is an automated email from the ASF dual-hosted git repository. lfrolov pushed a commit to branch DATALAB-1408 in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
commit c8092d2e2f0c552a702ba285c3bd907b43311183 Author: leonidfrolov <[email protected]> AuthorDate: Tue Aug 16 11:15:32 2022 +0300 [DATALAB-2982]: added new files for hdinsight --- .../files/azure/dataengine-service_Dockerfile | 39 +++++++++ .../azure/dataengine-service_description.json | 27 ++++++ .../scripts/azure/dataengine-service_configure.py | 60 +++++++++++++ .../scripts/azure/dataengine-service_prepare.py | 4 +- .../scripts/azure/dataengine-service_terminate.py | 99 ++++++++++++++++++++++ 5 files changed, 227 insertions(+), 2 deletions(-) diff --git a/infrastructure-provisioning/src/general/files/azure/dataengine-service_Dockerfile b/infrastructure-provisioning/src/general/files/azure/dataengine-service_Dockerfile new file mode 100644 index 000000000..2b443239b --- /dev/null +++ b/infrastructure-provisioning/src/general/files/azure/dataengine-service_Dockerfile @@ -0,0 +1,39 @@ +# ***************************************************************************** +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ****************************************************************************** + +FROM docker.datalab-base:latest + +ARG OS + +COPY dataengine-service/fabfile.py /root/ +COPY dataengine-service/description.json /root/ +COPY general/scripts/azure/dataengine-service_* /root/scripts/ +COPY general/lib/os/${OS}/notebook_lib.py /usr/lib/python3.8/datalab/notebook_lib.py +COPY general/scripts/os/common_* /root/scripts/ +COPY general/scripts/os/install_additional_libs.py /root/scripts/install_additional_libs.py +COPY general/scripts/os/get_list_available_pkgs.py /root/scripts/get_list_available_pkgs.py +COPY general/templates/os/inactive.sh /root/templates/ +COPY general/templates/os/inactive.service /root/templates/ +COPY general/templates/os/inactive.timer /root/templates/ + +RUN chmod a+x /root/fabfile.py; \ + chmod a+x /root/scripts/* + diff --git a/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json b/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json new file mode 100644 index 000000000..d8ad248e5 --- /dev/null +++ b/infrastructure-provisioning/src/general/files/azure/dataengine-service_description.json @@ -0,0 +1,27 @@ +{ + "template_name": "HDInsight cluster", + "description": "HDInsight cluster", + "environment_type": "computational", + "computation_resources_shapes": + { + "For testing" : [ + {"Size": "S", "Description": "Standard_F4s", "Type": "Standard_F4s","Ram": "8.0 GB","Cpu": "4"} + ], + "Memory optimized" : [ + {"Size": "S", "Description": "Standard_E4s_v3", "Type": "Standard_E4s_v3","Ram": "32 GB","Cpu": "4"}, + {"Size": "M", "Description": "Standard_E16s_v3", "Type": "Standard_E16s_v3","Ram": "128 GB","Cpu": "16"}, + {"Size": "L", "Description": "Standard_E32s_v3", "Type": "Standard_E32s_v3","Ram": "256 GB","Cpu": "32"} + ], + "Compute optimized": [ + {"Size": "S", "Description": "Standard_F4s", "Type": "Standard_F4s","Ram": "8.0 GB","Cpu": "4"}, + {"Size": "M", "Description": "Standard_F8s", "Type": "Standard_F8s","Ram": "16.0 GB","Cpu": "8"}, + {"Size": "L", "Description": "Standard_F16s", "Type": "Standard_F16s","Ram": "32.0 GB","Cpu": "16"} + ], + "GPU optimized": [ + {"Size": "S", "Description": "Standard_NC6", "Type": "Standard_NC6","Ram": "56.0 GB","Cpu": "6"} + ] + }, + "templates": + [ + ] +} \ No newline at end of file diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py index e69de29bb..9973c3d5c 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py +++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py @@ -0,0 +1,60 @@ +#!/usr/bin/python3 + +# ***************************************************************************** +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ****************************************************************************** + +if __name__ == "__main__": + try: + data_engine['service_base_name'] = os.environ['conf_service_base_name'] + data_engine['resource_group_name'] = os.environ['azure_resource_group_name'] + data_engine['region'] = os.environ['azure_region'] + data_engine['key_name'] = os.environ['conf_key_name'] + data_engine['vpc_name'] = os.environ['azure_vpc_name'] + data_engine['user_name'] = os.environ['edge_user_name'] + data_engine['project_name'] = os.environ['project_name'] + data_engine['project_tag'] = data_engine['project_name'] + data_engine['endpoint_name'] = os.environ['endpoint_name'] + data_engine['endpoint_tag'] = data_engine['endpoint_name'] + data_engine['master_node_name'] = '{}-m'.format(data_engine['cluster_name']) + data_engine['key_name'] = os.environ['conf_key_name'] + if 'computational_name' in os.environ: + data_engine['computational_name'] = os.environ['computational_name'] + else: + data_engine['computational_name'] = '' + data_engine['cluster_name'] = '{}-{}-{}-des-{}'.format(data_engine['service_base_name'], + data_engine['project_name'], + data_engine['endpoint_name'], + data_engine['computational_name']) + with open("/root/result.json", 'w') as result: + res = {"hostname": data_engine['cluster_name'], + "instance_id": data_engine['master_node_name'], + "key_name": data_engine['key_name'], + "Action": "Create new HDInsight cluster", + "computational_url": [ + {"description": "HDInsight cluster", + "url": "spark_master_access_url"} + # {"description": "Apache Spark Master (via tunnel)", + # "url": spark_master_url} + ] + } + result.write(json.dumps(res)) + except: + pass \ No newline at end of file diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py index 140495368..177275f78 100644 --- a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py +++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_prepare.py @@ -103,5 +103,5 @@ def create_cluster_parameters(): ) if __name__ == "__main__": - params = create_cluster_parameters() - create_hdinsight_cluster(RESOURCE_GROUP_NAME,CLUSTER_NAME, params) \ No newline at end of file + #params = create_cluster_parameters() + #create_hdinsight_cluster(RESOURCE_GROUP_NAME,CLUSTER_NAME, params) \ No newline at end of file diff --git a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py new file mode 100644 index 000000000..02466e026 --- /dev/null +++ b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_terminate.py @@ -0,0 +1,99 @@ +#!/usr/bin/python3 + +# ***************************************************************************** +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# ****************************************************************************** + +import boto3 +import datalab.actions_lib +import datalab.fab +import datalab.meta_lib +import json +import os +import sys +import traceback +from datalab.logger import logging + + +def terminate_hdin_cluster(hdin_name, bucket_name, tag_name, nb_tag_value, ssh_user, key_path): + logging.info('Terminating hdin cluster and cleaning hdin config from S3 bucket') + # try: + # clusters_list = datalab.meta_lib.get_hdin_list(hdin_name, 'Value') + # if clusters_list: + # for cluster_id in clusters_list: + # computational_name = '' + # client = boto3.client('hdin') + # cluster = client.describe_cluster(ClusterId=cluster_id) + # cluster = cluster.get("Cluster") + # hdin_name = cluster.get('Name') + # hdin_version = cluster.get('ReleaseLabel') + # for tag in cluster.get('Tags'): + # if tag.get('Key') == 'ComputationalName': + # computational_name = tag.get('Value') + # datalab.actions_lib.s3_cleanup(bucket_name, hdin_name, os.environ['project_name']) + # print("The bucket {} has been cleaned successfully".format(bucket_name)) + # datalab.actions_lib.terminate_hdin(cluster_id) + # print("The hdin cluster {} has been terminated successfully".format(hdin_name)) + # print("Removing hdin kernels from notebook") + # datalab.actions_lib.remove_kernels(hdin_name, tag_name, nb_tag_value, ssh_user, key_path, + # hdin_version, computational_name) + # else: + # logging.info("There are no hdin clusters to terminate.") + except: + sys.exit(1) + + +if __name__ == "__main__": + # generating variables dictionary + datalab.actions_lib.create_aws_config_files() + logging.info('Generating infrastructure names and tags') + hdin_conf = dict() + hdin_conf['service_base_name'] = (os.environ['conf_service_base_name']) + hdin_conf['hdin_name'] = os.environ['computational_name'] + hdin_conf['notebook_name'] = os.environ['notebook_instance_name'] + hdin_conf['project_name'] = os.environ['project_name'] + hdin_conf['endpoint_name'] = os.environ['endpoint_name'] + hdin_conf['bucket_name'] = '{0}-{1}-{2}-bucket'.format(hdin_conf['service_base_name'], hdin_conf['project_name'], + hdin_conf['endpoint_name']).lower().replace('_', '-') + hdin_conf['key_path'] = os.environ['conf_key_dir'] + '/' + os.environ['conf_key_name'] + '.pem' + hdin_conf['tag_name'] = hdin_conf['service_base_name'] + '-tag' + + # try: + # logging.info('[TERMINATE hdin CLUSTER]') + # try: + # terminate_hdin_cluster(hdin_conf['hdin_name'], hdin_conf['bucket_name'], hdin_conf['tag_name'], + # hdin_conf['notebook_name'], os.environ['conf_os_user'], hdin_conf['key_path']) + # except Exception as err: + # traceback.print_exc() + # datalab.fab.append_result("Failed to terminate hdin cluster.", str(err)) + # raise Exception + # except: + # sys.exit(1) + + try: + with open("/root/result.json", 'w') as result: + res = {"dataengine-service_name": hdin_conf['hdin_name'], + "notebook_name": hdin_conf['notebook_name'], + "Action": "Terminate HDInsight cluster"} + print(json.dumps(res)) + result.write(json.dumps(res)) + except Exception as err: + datalab.fab.append_result("Error with writing results", str(err)) + sys.exit(1) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
