This is an automated email from the ASF dual-hosted git repository. mykolabodnar pushed a commit to branch DATALAB-2409 in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
commit 66eb986f6220430c89829ec6349df35b1fc3046c Author: bodnarmykola <[email protected]> AuthorDate: Wed Jul 21 14:46:49 2021 +0300 [DATALAB-2409] - DataLab deployment script refactored --- .../scripts/deploy_datalab.py | 399 +++++++++++++-------- .../src/base/scripts/install_prerequisites.py | 13 - .../src/general/conf/datalab.ini | 4 - .../src/general/files/aws/base_Dockerfile | 3 +- .../src/general/files/azure/base_Dockerfile | 3 +- .../src/general/files/gcp/base_Dockerfile | 3 +- .../src/general/files/os/debian/sources.list | 56 --- .../src/general/files/os/redhat/sources.list | 83 ----- .../src/general/lib/os/debian/common_lib.py | 7 +- .../src/general/lib/os/redhat/common_lib.py | 7 - ...common_notebook_configure_dataengine-service.py | 4 +- .../src/general/scripts/aws/ssn_prepare.py | 15 +- ...common_notebook_configure_dataengine-service.py | 5 +- infrastructure-provisioning/src/ssn/fabfile.py | 8 +- .../src/ssn/scripts/configure_docker.py | 7 - 15 files changed, 256 insertions(+), 361 deletions(-) diff --git a/infrastructure-provisioning/scripts/deploy_datalab.py b/infrastructure-provisioning/scripts/deploy_datalab.py index e3bdd4c..cf322a6 100644 --- a/infrastructure-provisioning/scripts/deploy_datalab.py +++ b/infrastructure-provisioning/scripts/deploy_datalab.py @@ -20,162 +20,236 @@ # # ****************************************************************************** +"""Examples How to deploy DataLab for different cloud providers. + +``GCP`` example:: + + $ infrastructure-provisioning/scripts/deploy_datalab.py \ + --conf_service_base_name <SERVICE_NAME> \ + --conf_os_family debian \ + --action create \ + --key_path /home/ubuntu/.ssh \ + --conf_key_name gcp \ + --billing_dataset_name billing \ + gcp \ + --gcp_ssn_instance_size n1-standard-2 \ + --gcp_project_id <PROJECT_ID>\ + --gcp_service_account_path /home/ubuntu/secret.json\ + --gcp_region us-west1\ + --gcp_zone us-west1-a + +``AWS`` example:: + + $ infrastructure-provisioning/scripts/deploy_datalab.py\ + --conf_service_base_name datalab-test\ + --conf_os_family debian\ + --action create \ + --key_path /path/to/key/\ + --conf_key_name key_name\ + --conf_tag_resource_id datalab\ + aws\ + --aws_vpc_id vpc-xxxxx\ + --aws_subnet_id subnet-xxxxx\ + --aws_security_groups_ids sg-xxxxx,sg-xxxx\ + --aws_access_key XXXXXXX\ + --aws_secret_access_key XXXXXXXXXX\ + --aws_region xx-xxxxx-x\ + --aws_account_id xxxxxxxx\ + --aws_billing_bucket billing_bucket\ + --aws_report_path /billing/directory/\ + +""" import argparse import os import subprocess -from fabric import * -from invoke import task - -parser = argparse.ArgumentParser() -parser.add_argument('--conf_service_base_name', type=str, help='unique name for DataLab environment') -parser.add_argument('--conf_network_type', type=str, default='', - help='Define in which network DataLab will be deployed. ' - 'Possible options: public|private') -parser.add_argument('--conf_vpc_cidr', type=str, default='', help='CIDR of VPC') -parser.add_argument('--conf_vpc2_cidr', type=str, default='', help='CIDR of secondary VPC') -parser.add_argument('--conf_allowed_ip_cidr', type=str, default='', help='Comma-separated CIDR of IPs which will have ' - 'access to SSN') -parser.add_argument('--conf_user_subnets_range', type=str, default='', help='Range of subnets which will be using for ' - 'users environments. For example: ' - '10.10.0.0/24 - 10.10.10.0/24') -parser.add_argument('--conf_private_subnet_prefix', type=str, default='24', help='Private subnet prefix') -parser.add_argument('--conf_additional_tags', type=str, default='', help='Additional tags in format ' - '"Key1:Value1;Key2:Value2"') -parser.add_argument('--conf_image_enabled', type=str, default='', help='Enable or Disable creating image at first time') -parser.add_argument('--aws_user_predefined_s3_policies', type=str, default='', help='Predefined policies for users ' - 'instances') -parser.add_argument('--aws_access_key', type=str, default='', help='AWS Access Key ID') -parser.add_argument('--aws_secret_access_key', type=str, default='', help='AWS Secret Access Key') -parser.add_argument('--aws_region', type=str, default='', help='AWS region') -parser.add_argument('--aws_zone', type=str, default='', help='AWS zone') -parser.add_argument('--azure_region', type=str, default='', help='Azure region') -parser.add_argument('--gcp_region', type=str, default='', help='GCP region') -parser.add_argument('--gcp_zone', type=str, default='', help='GCP zone') -parser.add_argument('--conf_os_family', type=str, default='', - help='Operating system type. Available options: debian, redhat') -parser.add_argument('--conf_cloud_provider', type=str, default='', - help='Where DataLab should be deployed. Available options: aws, azure, gcp') -parser.add_argument('--ssn_hosted_zone_name', type=str, default='', help='Name of hosted zone') -parser.add_argument('--ssn_hosted_zone_id', type=str, default='', help='ID of hosted zone') -parser.add_argument('--ssn_subdomain', type=str, default='', help='Subdomain name') -parser.add_argument('--ssn_assume_role_arn', type=str, default='', help='Role ARN for creating Route53 record in ' - 'different AWS account') -parser.add_argument('--ssl_cert_path', type=str, default='', help='Full path to SSL certificate') -parser.add_argument('--ssl_key_path', type=str, default='', help='Full path to key for SSL certificate') -parser.add_argument('--aws_vpc_id', type=str, default='', help='AWS VPC ID') -parser.add_argument('--conf_duo_vpc_enable', type=str, default='false', help='Duo VPC scheme enable(true|false)') -parser.add_argument('--aws_vpc2_id', type=str, default='', help='Secondary AWS VPC ID') -parser.add_argument('--aws_peering_id', type=str, default='', help='Amazon peering connection id') -parser.add_argument('--azure_vpc_name', type=str, default='', help='Azure VPC Name') -parser.add_argument('--gcp_vpc_name', type=str, default='', help='GCP VPC Name') -parser.add_argument('--aws_subnet_id', type=str, default='', help='AWS Subnet ID') -parser.add_argument('--azure_subnet_name', type=str, default='', help='Azure Subnet Name') -parser.add_argument('--gcp_subnet_name', type=str, default='', help='GCP Subnet Name') -parser.add_argument('--aws_security_groups_ids', type=str, default='', help='One of more comma-separated Security ' - 'groups IDs for SSN') -parser.add_argument('--azure_security_group_name', type=str, default='', help='One Security' - 'group name for SSN') -parser.add_argument('--azure_edge_security_group_name', type=str, default='', help='One Security ' - 'group name for Edge node') -parser.add_argument('--gcp_firewall_name', type=str, default='', help='One of more comma-separated GCP Firewall rules ' - 'for SSN') -parser.add_argument('--key_path', type=str, default='', help='Path to admin key (WITHOUT KEY NAME)') -parser.add_argument('--conf_key_name', type=str, default='', help='Admin key name (WITHOUT ".pem")') -parser.add_argument('--workspace_path', type=str, default='', help='Admin key name (WITHOUT ".pem")') -parser.add_argument('--conf_tag_resource_id', type=str, default='datalab', help='The name of user tag') -parser.add_argument('--conf_billing_tag', type=str, default='datalab', help='Billing tag') -parser.add_argument('--aws_ssn_instance_size', type=str, default='t2.large', help='The SSN instance shape') -parser.add_argument('--azure_ssn_instance_size', type=str, default='Standard_DS2_v2', help='The SSN instance shape') -parser.add_argument('--gcp_ssn_instance_size', type=str, default='n1-standard-2', help='The SSN instance shape') -parser.add_argument('--aws_account_id', type=str, default='', help='The ID of Amazon account') -parser.add_argument('--aws_billing_bucket', type=str, default='', help='The name of S3 bucket where billing reports ' - 'will be placed.') -parser.add_argument('--aws_job_enabled', type=str, default='false', help='Billing format. Available options: ' - 'true (aws), false(epam)') -parser.add_argument('--aws_report_path', type=str, default='', help='The path to billing reports directory in S3 ' - 'bucket') -parser.add_argument('--azure_resource_group_name', type=str, default='', help='Name of Resource group in Azure') -parser.add_argument('--azure_auth_path', type=str, default='', help='Full path to Azure credentials JSON file') -parser.add_argument('--azure_datalake_enable', type=str, default='', help='Provision DataLake storage account') -parser.add_argument('--azure_ad_group_id', type=str, default='', help='ID of Azure AD group') -parser.add_argument('--azure_offer_number', type=str, default='', help='Azure offer number') -parser.add_argument('--azure_currency', type=str, default='', help='Azure currency code') -parser.add_argument('--azure_locale', type=str, default='', help='Azure locale') -parser.add_argument('--azure_application_id', type=str, default='', help='Azure login application ID') -parser.add_argument('--azure_validate_permission_scope', type=str, default='true', help='Azure permission scope ' - 'validation(true|false).') -parser.add_argument('--azure_oauth2_enabled', type=str, default='false', help='Using OAuth2 for logging in DataLab') -parser.add_argument('--azure_region_info', type=str, default='', help='Azure region info') -parser.add_argument('--azure_source_vpc_name', type=str, default='', help='Azure VPC source Name') -parser.add_argument('--azure_source_resource_group_name', type=str, default='', help='Azure source resource group') -parser.add_argument('--gcp_project_id', type=str, default='', help='The project ID in Google Cloud Platform') -parser.add_argument('--gcp_service_account_path', type=str, default='', help='The project ID in Google Cloud Platform') -parser.add_argument('--datalab_id', type=str, default="'resource_tags_user_user_tag'", - help='Column name in report file that contains ' - 'datalab id tag') -parser.add_argument('--usage_date', type=str, default='line_item_usage_start_date', - help='Column name in report file that contains ' - 'usage date tag') -parser.add_argument('--product', type=str, default='product_product_name', - help='Column name in report file that contains ' - 'product name tag') -parser.add_argument('--usage_type', type=str, default='line_item_usage_type', - help='Column name in report file that contains ' - 'usage type tag') -parser.add_argument('--usage', type=str, default='line_item_usage_amount', - help='Column name in report file that contains ' - 'usage tag') -parser.add_argument('--cost', type=str, default='line_item_blended_cost', - help='Column name in report file that contains cost tag') -parser.add_argument('--resource_id', type=str, default='line_item_resource_id', - help='Column name in report file that contains ' - 'datalab resource id tag') -parser.add_argument('--ldap_hostname', type=str, default='localhost', help='Ldap instance hostname') -parser.add_argument('--ldap_dn', type=str, default='dc=example,dc=com', - help='Ldap distinguished name') -parser.add_argument('--ldap_ou', type=str, default='ou=People', help='Ldap organisation unit') -parser.add_argument('--ldap_service_username', type=str, default='cn=service-user', help='Ldap service user name') -parser.add_argument('--ldap_service_password', type=str, default='service-user-password', - help='Ldap password for admin user') -parser.add_argument('--keycloak_realm_name', type=str, default='datalab', help='Keycloak Realm name') -parser.add_argument('--keycloak_auth_server_url', type=str, default='datalab', help='Keycloak auth server URL') -parser.add_argument('--keycloak_client_name', type=str, default='datalab', help='Keycloak client name') -parser.add_argument('--keycloak_client_secret', type=str, default='datalab', help='Keycloak client secret') -parser.add_argument('--keycloak_user', type=str, default='datalab', help='Keycloak user') -parser.add_argument('--keycloak_user_password', type=str, default='keycloak-user-password', - help='Keycloak user password') -parser.add_argument('--tags', type=str, default='line_item_operation,line_item_line_item_description', - help='Column name in report file that ' - 'contains tags') -parser.add_argument('--billing_dataset_name', type=str, default='', help='Name of GCP dataset (BigQuery service)' - ' for billing') -parser.add_argument('--default_endpoint_name', type=str, default='local', help='Name of localhost provisioning service,' - 'that created by default') -parser.add_argument('--conf_stepcerts_enabled', type=str, default='false', help='Enable or disable step certificates') -parser.add_argument('--conf_stepcerts_root_ca', type=str, default='', help='Step root CA') -parser.add_argument('--conf_stepcerts_kid', type=str, default='', help='Step KID') -parser.add_argument('--conf_stepcerts_kid_password', type=str, default='', help='Step KID password') -parser.add_argument('--conf_stepcerts_ca_url', type=str, default='', help='Step CA URL') -parser.add_argument('--conf_letsencrypt_enabled', type=str, default='false', - help='Enable or disable Let`s Encrypt certificates') -parser.add_argument('--conf_repository_user', type=str, default='', - help='user to access repository (used for jars download)') -parser.add_argument('--conf_release_tag', type=str, default='2.5', - help='tag used for jars download') -parser.add_argument('--conf_repository_pass', type=str, default='', - help='password to access repository (used for jars download)') -parser.add_argument('--conf_repository_address', type=str, default='', - help='address to access repository (used for jars download)') -parser.add_argument('--conf_letsencrypt_domain_name', type=str, default='', help='Domain names to apply. ' - 'For multiple domains enter a comma separated list of domains as a parameter' - 'ssn.domain_name will be used for ssn_node, DNS A record have to exist during deployment') -parser.add_argument('--conf_letsencrypt_email', type=str, default='', help='Email that will be entered during ' - 'certificate obtaining and can be user for urgent renewal and security notices. ' - 'Use comma to register multiple emails, e.g. [email protected],[email protected].') -parser.add_argument('--action', required=True, type=str, default='', choices=['build', 'deploy', 'create', 'terminate'], - help='Available options: build, deploy, create, terminate') -args = parser.parse_args() +import sys + +BOOL_CHOICES_LIST = ['true', 'false'] +OS_DISTRO_LIST = ['debian', 'redhat'] +NETWORK_TYPE_LIST = ['public', 'private'] + + +def build_parser(): + parser = argparse.ArgumentParser(description='DataLab Self-Service Node deployment', + prog='deploy_datalab') + # optional arguments + parser.add_argument('--conf_network_type', type=str, default='public', + help='''Type of network. Define in which network DataLab will be deployed. + (valid choices: %s)''' % NETWORK_TYPE_LIST, + choices=NETWORK_TYPE_LIST) + parser.add_argument('--conf_vpc_cidr', type=str, default='172.31.0.0/16', help='CIDR of VPC') + parser.add_argument('--conf_vpc2_cidr', type=str, help='CIDR of secondary VPC') + parser.add_argument('--conf_allowed_ip_cidr', type=str, default='0.0.0.0/0', + help='Comma-separated CIDR of IPs which will have access to SSN') + parser.add_argument('--conf_user_subnets_range', type=str, + help='''Range of subnets which will be using for users environments. + For example: 10.10.0.0/24 - 10.10.10.0/24''') + parser.add_argument('--conf_private_subnet_prefix', type=str, default='24', help='Private subnet prefix') + parser.add_argument('--conf_additional_tags', type=str, + help='Additional tags in format "Key1:Value1;Key2:Value2"') + parser.add_argument('--conf_image_enabled', type=str, + help='Enable or Disable creating image at first time') + parser.add_argument('--conf_os_family', type=str, default='debian', choices=OS_DISTRO_LIST, + help='Operating system distribution. (valid choices: %s)' % OS_DISTRO_LIST) + parser.add_argument('--ssn_hosted_zone_name', type=str, help='Name of hosted zone') + parser.add_argument('--ssn_hosted_zone_id', type=str, help='ID of hosted zone') + parser.add_argument('--ssn_subdomain', type=str, help='Subdomain name') + parser.add_argument('--ssl_cert_path', type=str, help='Full path to SSL certificate') + parser.add_argument('--ssl_key_path', type=str, help='Full path to key for SSL certificate') + parser.add_argument('--workspace_path', type=str, default='', help='Docker workspace path') + parser.add_argument('--conf_tag_resource_id', type=str, default='datalab', help='The name of user tag') + parser.add_argument('--conf_billing_tag', type=str, default='datalab', help='Billing tag') + parser.add_argument('--datalab_id', type=str, default='resource_tags_user_user_tag', + help='Column name in report file that contains datalab id tag') + parser.add_argument('--usage_date', type=str, default='line_item_usage_start_date', + help='Column name in report file that contains usage date tag') + parser.add_argument('--product', type=str, default='product_product_name', + help='Column name in report file that contains product name tag') + parser.add_argument('--usage_type', type=str, default='line_item_usage_type', + help='Column name in report file that contains usage type tag') + parser.add_argument('--usage', type=str, default='line_item_usage_amount', + help='Column name in report file that contains usage tag') + parser.add_argument('--cost', type=str, default='line_item_blended_cost', + help='Column name in report file that contains cost tag') + parser.add_argument('--resource_id', type=str, default='line_item_resource_id', + help='Column name in report file that contains datalab resource id tag') + + parser.add_argument('--tags', type=str, default='line_item_operation,line_item_line_item_description', + help='Column name in report file that contains tags') + parser.add_argument('--conf_stepcerts_enabled', type=str, default='false', + help='Enable or disable step certificates. (valid choices: %s)' % BOOL_CHOICES_LIST, + choices=BOOL_CHOICES_LIST) + parser.add_argument('--conf_stepcerts_root_ca', type=str, help='Step root CA') + parser.add_argument('--conf_stepcerts_kid', type=str, help='Step KID') + parser.add_argument('--conf_stepcerts_kid_password', type=str, help='Step KID password') + parser.add_argument('--conf_stepcerts_ca_url', type=str, help='Step CA URL') + parser.add_argument('--conf_letsencrypt_enabled', type=str, default='false', + help='Enable or disable Let`s Encrypt certificates. (valid choices: %s)' % BOOL_CHOICES_LIST, + choices=BOOL_CHOICES_LIST) + parser.add_argument('--conf_letsencrypt_domain_name', type=str, + help='''Domain names to apply. For multiple domains enter a comma separated list of domains + as a parameter. ssn.domain_name will be used for ssn_node,DNS A record have to exist during deployment''') + parser.add_argument('--conf_letsencrypt_email', type=str, help='''Email that will be entered during + certificate obtaining and can be user for urgent renewal and security notices. Use comma to register + multiple emails, e.g. [email protected],[email protected].''') + parser.add_argument('--conf_repository_user', type=str, default='', + help='user to access repository (used for jars download)') + parser.add_argument('--conf_release_tag', type=str, default='2.5', + help='tag used for jars download') + parser.add_argument('--conf_repository_pass', type=str, default='', + help='password to access repository (used for jars download)') + parser.add_argument('--conf_repository_address', type=str, default='', + help='address to access repository (used for jars download)') + + required_args = parser.add_argument_group('Required arguments') + required_args.add_argument('--conf_service_base_name', type=str, + help='Unique name for DataLab environment', required=True) + required_args.add_argument('--action', type=str, help='Action to perform', + choices=['build', 'deploy', 'create', 'terminate'], required=True) + required_args.add_argument('--key_path', type=str, help='Path to admin key (WITHOUT KEY NAME)', required=True) + required_args.add_argument('--conf_key_name', type=str, help='Admin key name (WITHOUT ".pem")', required=True) + required_args.add_argument('--keycloak_auth_server_url', type=str, default='datalab', + help='Keycloak auth server URL', required=True) + required_args.add_argument('--keycloak_realm_name', type=str, help='Keycloak Realm name', required=True) + required_args.add_argument('--keycloak_client_name', type=str, default='datalab', + help='Keycloak client name', required=True) + required_args.add_argument('--keycloak_client_secret', type=str, default='datalab', + help='Keycloak client secret', required=True) + required_args.add_argument('--keycloak_user', type=str, default='datalab', help='Keycloak user', required=True) + required_args.add_argument('--keycloak_user_password', type=str, default='keycloak-user-password', + help='Keycloak user password', required=True) + required_args.add_argument('--default_endpoint_name', type=str, default='local', + help='Name of localhost provisioning service, that created by default', required=True) + + # subparsers + subparsers = parser.add_subparsers(dest='conf_cloud_provider', required=True, help='sub-command help', + description='''These are the subcommands for deploying resources + in a specific cloud provider''') + + # --------- aws subcommand ---------------------- + aws_parser = subparsers.add_parser('aws') + aws_parser.add_argument('--aws_user_predefined_s3_policies', type=str, + help='Predefined policies for users instances') + aws_parser.add_argument('--aws_access_key', type=str, + help='''AWS Access Key ID. reuqired in case of deployment with IAM user DataLab + deployment script is executed on local machine and uses + IAM user permissions to create resources in AWS.''') + aws_parser.add_argument('--aws_secret_access_key', type=str, help='AWS Secret Access Key') + aws_parser.add_argument('--ssn_assume_role_arn', type=str, + help='Role ARN for creating Route53 record in different AWS account') + aws_parser.add_argument('--aws_vpc_id', type=str, help='AWS VPC ID') + aws_parser.add_argument('--conf_duo_vpc_enable', type=str, default='false', + help='Duo VPC scheme enable. (valid choices: %s)' % BOOL_CHOICES_LIST, + choices=BOOL_CHOICES_LIST) + aws_parser.add_argument('--aws_vpc2_id', type=str, help='Secondary AWS VPC ID') + aws_parser.add_argument('--aws_peering_id', type=str, help='Amazon peering connection id') + aws_parser.add_argument('--aws_subnet_id', type=str, help='AWS Subnet ID') + aws_parser.add_argument('--aws_security_groups_ids', type=str, + help='One of more comma-separated Security groups IDs for SSN') + aws_parser.add_argument('--aws_billing_bucket', type=str, + help='The name of S3 bucket where billing reports will be placed.') + aws_parser.add_argument('--aws_job_enabled', type=str, default='false', choices=BOOL_CHOICES_LIST, + help='Billing format. (valid choices: %s)' % BOOL_CHOICES_LIST) + aws_parser.add_argument('--aws_report_path', type=str, help='The path to billing reports directory in S3 bucket') + + aws_required_args = aws_parser.add_argument_group('Required arguments') + aws_required_args.add_argument('--aws_region', type=str, required=True, help='AWS region') + aws_required_args.add_argument('--aws_zone', type=str, required=True, help='AWS zone') + aws_required_args.add_argument('--aws_ssn_instance_size', type=str, required=True, default='t2.large', + help='The SSN instance shape') + aws_required_args.add_argument('--aws_account_id', type=str, required=True, help='The ID of Amazon account') + + # --------azure subcommand ------------------------- + azure_parser = subparsers.add_parser('azure') + azure_parser.add_argument('--azure_vpc_name', type=str, help='Azure VPC Name') + azure_parser.add_argument('--azure_subnet_name', type=str, help='Azure Subnet Name') + azure_parser.add_argument('--azure_security_group_name', type=str, help='One Security group name for SSN') + azure_parser.add_argument('--azure_edge_security_group_name', type=str, + help='One Security group name for Edge node') + azure_parser.add_argument('--azure_resource_group_name', type=str, help='Name of Resource group in Azure') + azure_parser.add_argument('--azure_datalake_enable', type=str, default='false', choices=BOOL_CHOICES_LIST, + help='Provision DataLake storage account. (valid choices: %s)' % BOOL_CHOICES_LIST) + azure_parser.add_argument('--azure_ad_group_id', type=str, help='ID of Azure AD group') + azure_parser.add_argument('--azure_offer_number', type=str, help='Azure offer number') + azure_parser.add_argument('--azure_currency', type=str, help='Azure currency code') + azure_parser.add_argument('--azure_locale', type=str, help='Azure locale') + azure_parser.add_argument('--azure_application_id', type=str, help='Azure login application ID') + azure_parser.add_argument('--azure_validate_permission_scope', type=str, default='true', + choices=BOOL_CHOICES_LIST, + help='Azure permission scope validation. (valid choices: %s)' % BOOL_CHOICES_LIST) + azure_parser.add_argument('--azure_oauth2_enabled', type=str, default='false', choices=BOOL_CHOICES_LIST, + help='Using OAuth2 for logging in DataLab. (valid choices: %s)' % BOOL_CHOICES_LIST) + azure_parser.add_argument('--azure_region_info', type=str, help='Azure region info') + azure_parser.add_argument('--azure_source_vpc_name', type=str, help='Azure VPC source Name') + azure_parser.add_argument('--azure_source_resource_group_name', type=str, help='Azure source resource group') + + azure_required_args = azure_parser.add_argument_group('Required arguments') + azure_required_args.add_argument('--azure_region', type=str, required=True, help='Azure region') + azure_required_args.add_argument('--azure_ssn_instance_size', type=str, default='Standard_DS2_v2', required=True, + help='The SSN instance shape') + azure_required_args.add_argument('--azure_auth_path', type=str, required=True, + help='Full path to Azure credentials JSON file') + + # --------gcp subcommand ----------------------------- + gcp_parser = subparsers.add_parser('gcp') + gcp_parser.add_argument('--billing_dataset_name', type=str, + help='Name of GCP dataset (BigQuery service) for billing') + gcp_parser.add_argument('--gcp_subnet_name', type=str, help='GCP Subnet Name') + gcp_parser.add_argument('--gcp_vpc_name', type=str, help='GCP VPC Name') + gcp_parser.add_argument('--gcp_firewall_name', type=str, + help='One of more comma-separated GCP Firewall rules for SSN') + + gcp_required_args = gcp_parser.add_argument_group('Required arguments') + gcp_required_args.add_argument('--gcp_region', type=str, required=True, help='GCP region') + gcp_required_args.add_argument('--gcp_zone', type=str, required=True, help='GCP zone') + gcp_required_args.add_argument('--gcp_ssn_instance_size', type=str, required=True, default='n1-standard-2', + help='The SSN instance shape') + gcp_required_args.add_argument('--gcp_project_id', type=str, required=True, + help='The project ID in Google Cloud Platform') + gcp_required_args.add_argument('--gcp_service_account_path', type=str, required=True, + help='The project ID in Google Cloud Platform') + return parser def generate_docker_command(): @@ -207,11 +281,15 @@ def generate_docker_command(): def build_docker_images(args): # Building base and ssn docker images - subprocess.run('cd {2}; sudo docker build --build-arg OS={0} --build-arg SRC_PATH="infrastructure-provisioning/src/" --file ' - 'infrastructure-provisioning/src/general/files/{1}/' - 'base_Dockerfile -t docker.datalab-base .'.format(args.conf_os_family, args.conf_cloud_provider, args.workspace_path), shell=True, check=True) - subprocess.run('cd {2}; sudo docker build --build-arg OS={0} --file infrastructure-provisioning/src/general/files/{1}/' - 'ssn_Dockerfile -t docker.datalab-ssn .'.format(args.conf_os_family, args.conf_cloud_provider, args.workspace_path), shell=True, check=True) + subprocess.run( + 'cd {2}; sudo docker build --build-arg OS={0} --build-arg SRC_PATH="infrastructure-provisioning/src/" --file ' + 'infrastructure-provisioning/src/general/files/{1}/' + 'base_Dockerfile -t docker.datalab-base .'.format(args.conf_os_family, args.conf_cloud_provider, + args.workspace_path), shell=True, check=True) + subprocess.run( + 'cd {2}; sudo docker build --build-arg OS={0} --file infrastructure-provisioning/src/general/files/{1}/' + 'ssn_Dockerfile -t docker.datalab-ssn .'.format(args.conf_os_family, args.conf_cloud_provider, + args.workspace_path), shell=True, check=True) def deploy_datalab(args): @@ -227,6 +305,13 @@ def terminate_datalab(args): if __name__ == "__main__": + parser = build_parser() + args = parser.parse_args() + + if args.aws_secret_access_key or args.aws_access_key: + if not (args.aws_secret_access_key and args.aws_access_key): + sys.exit('Please provide both arguments: --aws_secret_access_key and --aws_access_key') + if not args.workspace_path: print("Workspace path isn't set, using current directory: {}".format(os.environ['PWD'])) args.workspace_path = os.environ['PWD'] diff --git a/infrastructure-provisioning/src/base/scripts/install_prerequisites.py b/infrastructure-provisioning/src/base/scripts/install_prerequisites.py index 7b747b2..b75ae24 100644 --- a/infrastructure-provisioning/src/base/scripts/install_prerequisites.py +++ b/infrastructure-provisioning/src/base/scripts/install_prerequisites.py @@ -43,25 +43,12 @@ parser.add_argument('--region', type=str, default='') args = parser.parse_args() -def create_china_pip_conf_file(conn): - if not exists(conn,'/home/{}/pip_china_ensured'.format(args.user)): - conn.sudo('touch /etc/pip.conf') - conn.sudo('echo "[global]" >> /etc/pip.conf') - conn.sudo('echo "timeout = 600" >> /etc/pip.conf') - conn.sudo('echo "index-url = https://{}/simple/" >> /etc/pip.conf'.format(os.environ['conf_pypi_mirror'])) - conn.sudo('echo "trusted-host = {}" >> /etc/pip.conf'.format(os.environ['conf_pypi_mirror'])) - conn.sudo('touch /home/{}/pip_china_ensured'.format(args.user)) - if __name__ == "__main__": print("Configure connections") global conn conn = init_datalab_connection(args.hostname, args.user, args.keyfile) deeper_config = json.loads(args.additional_config) - if args.region == 'cn-north-1': - change_pkg_repos() - create_china_pip_conf_file() - print("Updating hosts file") update_hosts_file(args.user) diff --git a/infrastructure-provisioning/src/general/conf/datalab.ini b/infrastructure-provisioning/src/general/conf/datalab.ini index f9d2d1b..4881a03 100644 --- a/infrastructure-provisioning/src/general/conf/datalab.ini +++ b/infrastructure-provisioning/src/general/conf/datalab.ini @@ -40,8 +40,6 @@ key_dir = /root/keys/ lifecycle_stage = dev ### The name of user for tag, which will be set for all resources # tag_resource_id = user:tag -### Pypi mirror for China -pypi_mirror = pypi.doubanio.com ### Name of own GitLab SSL certificate gitlab_certfile = datalab-gitlab.crt ### Enable or Disable creating image at first time @@ -262,8 +260,6 @@ scala_version = 2.12.8 livy_version = 0.3.0 ### If it is true, Livy will be used on Zeppelin notebook multiple_clusters = false -### R China mirror -r_mirror = http://mirror.lzu.edu.cn/CRAN/ ### NVidia driver version for Tensor/DeepLearning notebooks nvidia_version = 418.126.02 ### Caffe library version for DeepLearning notebook diff --git a/infrastructure-provisioning/src/general/files/aws/base_Dockerfile b/infrastructure-provisioning/src/general/files/aws/base_Dockerfile index 9cad9c4..70870b8 100644 --- a/infrastructure-provisioning/src/general/files/aws/base_Dockerfile +++ b/infrastructure-provisioning/src/general/files/aws/base_Dockerfile @@ -41,7 +41,7 @@ ENV LANGUAGE en_US:en ENV LC_ALL en_US.UTF-8 # Install any python dependencies -RUN python3 -m pip install -UI qtconsole==4.7.7 pip==21.0.1 && \ +RUN python3 -m pip install -UI qtconsole==5.1.1 pip==21.1.2 && \ python3 -m pip install boto3 backoff patchwork fabric fabvenv awscli argparse requests ujson jupyter pycryptodome # Configuring ssh for user @@ -79,7 +79,6 @@ COPY ${SRC_PATH}general/scripts/aws/common_* /root/scripts/ COPY ${SRC_PATH}general/lib/aws/* /usr/lib/python3.8/datalab/ COPY ${SRC_PATH}general/lib/os/${OS}/common_lib.py /usr/lib/python3.8/datalab/common_lib.py COPY ${SRC_PATH}general/lib/os/fab.py /usr/lib/python3.8/datalab/fab.py -COPY ${SRC_PATH}general/files/os/${OS}/sources.list /root/files/ COPY ${SRC_PATH}general/files/os/ivysettings.xml /root/templates/ COPY ${SRC_PATH}general/files/os/local_endpoint.json /root/files/ COPY ${SRC_PATH}project/templates/locations/ /root/locations/ diff --git a/infrastructure-provisioning/src/general/files/azure/base_Dockerfile b/infrastructure-provisioning/src/general/files/azure/base_Dockerfile index 3d608de..2070c05 100644 --- a/infrastructure-provisioning/src/general/files/azure/base_Dockerfile +++ b/infrastructure-provisioning/src/general/files/azure/base_Dockerfile @@ -30,7 +30,7 @@ RUN apt-get update && \ apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* # Install any python dependencies -RUN python3 -m pip install -UI qtconsole==4.7.7 pip==21.0.1 && \ +RUN python3 -m pip install -UI qtconsole==5.1.1 pip==21.1.2 && \ python3 -m pip install backoff patchwork fabric fabvenv argparse requests ujson jupyter pycryptodome azure==2.0.0 azure-mgmt-authorization pyyaml # Configuring ssh for user @@ -68,7 +68,6 @@ COPY ${SRC_PATH}general/scripts/azure/common_* /root/scripts/ COPY ${SRC_PATH}general/lib/azure/* /usr/lib/python3.8/datalab/ COPY ${SRC_PATH}general/lib/os/${OS}/common_lib.py /usr/lib/python3.8/datalab/common_lib.py COPY ${SRC_PATH}general/lib/os/fab.py /usr/lib/python3.8/datalab/fab.py -COPY ${SRC_PATH}general/files/os/${OS}/sources.list /root/files/ COPY ${SRC_PATH}general/files/os/ivysettings.xml /root/templates/ COPY ${SRC_PATH}general/files/os/local_endpoint.json /root/files/ COPY ${SRC_PATH}project/templates/locations/ /root/locations/ diff --git a/infrastructure-provisioning/src/general/files/gcp/base_Dockerfile b/infrastructure-provisioning/src/general/files/gcp/base_Dockerfile index 2059bce..bb4a81c 100644 --- a/infrastructure-provisioning/src/general/files/gcp/base_Dockerfile +++ b/infrastructure-provisioning/src/general/files/gcp/base_Dockerfile @@ -41,7 +41,7 @@ ENV LANGUAGE en_US:en ENV LC_ALL en_US.UTF-8 # Install any python dependencies -RUN python3 -m pip install -UI pip==21.0.1 && \ +RUN python3 -m pip install -UI pip==21.1.2 && \ python3 -m pip install -U six patchwork configparser boto3 backoff fabric fabvenv argparse ujson jupyter pycryptodome google-api-python-client google-cloud-storage \ pyyaml google-auth-httplib2 oauth2client @@ -79,7 +79,6 @@ COPY ${SRC_PATH}general/scripts/gcp/common_* /root/scripts/ COPY ${SRC_PATH}general/lib/gcp/* /usr/lib/python3.8/datalab/ COPY ${SRC_PATH}general/lib/os/${OS}/common_lib.py /usr/lib/python3.8/datalab/common_lib.py COPY ${SRC_PATH}general/lib/os/fab.py /usr/lib/python3.8/datalab/fab.py -COPY ${SRC_PATH}general/files/os/${OS}/sources.list /root/files/ COPY ${SRC_PATH}general/files/os/ivysettings.xml /root/templates/ COPY ${SRC_PATH}general/files/os/local_endpoint.json /root/files/ COPY ${SRC_PATH}project/templates/locations/ /root/locations/ diff --git a/infrastructure-provisioning/src/general/files/os/debian/sources.list b/infrastructure-provisioning/src/general/files/os/debian/sources.list deleted file mode 100644 index 3b3e80a..0000000 --- a/infrastructure-provisioning/src/general/files/os/debian/sources.list +++ /dev/null @@ -1,56 +0,0 @@ -# ***************************************************************************** -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# ****************************************************************************** - -# See http://help.ubuntu.com/community/UpgradeNotes for how to upgrade to -# newer versions of the distribution. - -deb http://mirrors.aliyun.com/ubuntu/ xenial main restricted -deb-src http://mirrors.aliyun.com/ubuntu/ xenial main restricted - -## Major bug fix updates produced after the final release of the -## distribution. -deb http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted -deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates main restricted - -## Uncomment the following two lines to add software from the 'universe' -## repository. -## N.B. software from this repository is ENTIRELY UNSUPPORTED by the Ubuntu -## team. Also, please note that software in universe WILL NOT receive any -## review or updates from the Ubuntu security team. -deb http://mirrors.aliyun.com/ubuntu/ xenial universe -deb-src http://mirrors.aliyun.com/ubuntu/ xenial universe -deb http://mirrors.aliyun.com/ubuntu/ xenial-updates universe -deb-src http://mirrors.aliyun.com/ubuntu/ xenial-updates universe - -## N.B. software from this repository may not have been tested as -## extensively as that contained in the main release, although it includes -## newer versions of some applications which may provide useful features. -## Also, please note that software in backports WILL NOT receive any review -## or updates from the Ubuntu security team. -# deb http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted -# deb-src http://mirrors.aliyun.com/ubuntu/ xenial-backports main restricted - -deb http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted -deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security main restricted -deb http://mirrors.aliyun.com/ubuntu/ xenial-security universe -deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security universe -# deb http://mirrors.aliyun.com/ubuntu/ xenial-security multiverse -# deb-src http://mirrors.aliyun.com/ubuntu/ xenial-security multiverse \ No newline at end of file diff --git a/infrastructure-provisioning/src/general/files/os/redhat/sources.list b/infrastructure-provisioning/src/general/files/os/redhat/sources.list deleted file mode 100644 index fe5549c..0000000 --- a/infrastructure-provisioning/src/general/files/os/redhat/sources.list +++ /dev/null @@ -1,83 +0,0 @@ -# ***************************************************************************** -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# ****************************************************************************** - -# CentOS-Base.repo -# -# The mirror system uses the connecting IP address of the client and the -# update status of each mirror to pick mirrors that are updated to and -# geographically close to the client. You should use this for CentOS updates -# unless you are manually picking other mirrors. -# -# If the mirrorlist= does not work for you, as a fall back you can try the -# remarked out baseurl= line instead. -# -# - -[base] -name=CentOS-7 - Base - mirrors.aliyun.com -failovermethod=priority -baseurl=http://mirrors.aliyun.com/centos/7/os/$basearch/ - http://mirrors.aliyuncs.com/centos/7/os/$basearch/ -#mirrorlist=http://mirrorlist.centos.org/?release=7&arch=$basearch&repo=os -gpgcheck=1 -gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7 - -#released updates -[updates] -name=CentOS-7 - Updates - mirrors.aliyun.com -failovermethod=priority -baseurl=http://mirrors.aliyun.com/centos/7/updates/$basearch/ - http://mirrors.aliyuncs.com/centos/7/updates/$basearch/ -#mirrorlist=http://mirrorlist.centos.org/?release=7&arch=$basearch&repo=updates -gpgcheck=1 -gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7 - -#additional packages that may be useful -[extras] -name=CentOS-7 - Extras - mirrors.aliyun.com -failovermethod=priority -baseurl=http://mirrors.aliyun.com/centos/7/extras/$basearch/ - http://mirrors.aliyuncs.com/centos/7/extras/$basearch/ -#mirrorlist=http://mirrorlist.centos.org/?release=7&arch=$basearch&repo=extras -gpgcheck=1 -gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7 - -#additional packages that extend functionality of existing packages -[centosplus] -name=CentOS-7 - Plus - mirrors.aliyun.com -failovermethod=priority -baseurl=http://mirrors.aliyun.com/centos/7/centosplus/$basearch/ - http://mirrors.aliyuncs.com/centos/7/centosplus/$basearch/ -#mirrorlist=http://mirrorlist.centos.org/?release=7&arch=$basearch&repo=centosplus -gpgcheck=1 -enabled=0 -gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7 - -#contrib - packages by Centos Users -[contrib] -name=CentOS-7 - Contrib - mirrors.aliyun.com -failovermethod=priority -baseurl=http://mirrors.aliyun.com/centos/7/contrib/$basearch/ - http://mirrors.aliyuncs.com/centos/7/contrib/$basearch/ -#mirrorlist=http://mirrorlist.centos.org/?release=7&arch=$basearch&repo=contrib -gpgcheck=1 -enabled=0 -gpgkey=http://mirrors.aliyun.com/centos/RPM-GPG-KEY-CentOS-7 \ No newline at end of file diff --git a/infrastructure-provisioning/src/general/lib/os/debian/common_lib.py b/infrastructure-provisioning/src/general/lib/os/debian/common_lib.py index 29b504b..e62d301 100644 --- a/infrastructure-provisioning/src/general/lib/os/debian/common_lib.py +++ b/infrastructure-provisioning/src/general/lib/os/debian/common_lib.py @@ -156,12 +156,7 @@ def renew_gpg_key(): sys.exit(1) -def change_pkg_repos(): - if not exists(datalab.fab.conn,'/tmp/pkg_china_ensured'): - datalab.fab.conn.put('/root/files/sources.list', '/tmp/sources.list') - datalab.fab.conn.sudo('mv /tmp/sources.list /etc/apt/sources.list') - manage_pkg('update', 'remote', '') - datalab.fab.conn.sudo('touch /tmp/pkg_china_ensured') + def find_java_path_remote(): diff --git a/infrastructure-provisioning/src/general/lib/os/redhat/common_lib.py b/infrastructure-provisioning/src/general/lib/os/redhat/common_lib.py index 6b432fd..15d51c5 100644 --- a/infrastructure-provisioning/src/general/lib/os/redhat/common_lib.py +++ b/infrastructure-provisioning/src/general/lib/os/redhat/common_lib.py @@ -89,13 +89,6 @@ def ensure_pkg(user, requisites='git vim gcc python-devel openssl-devel nmap lib sys.exit(1) -def change_pkg_repos(): - if not exists(datalab.fab.conn,'/tmp/pkg_china_ensured'): - datalab.fab.conn.put('/root/files/sources.list', '/tmp/sources.list') - datalab.fab.conn.sudo('mv /tmp/sources.list /etc/yum.repos.d/CentOS-Base-aliyun.repo') - datalab.fab.conn.sudo('touch /tmp/pkg_china_ensured') - - def find_java_path_remote(): java_path = datalab.fab.conn.sudo("alternatives --display java | grep 'slave jre: ' | awk '{print $3}'").stdout.replace('\n','') return java_path diff --git a/infrastructure-provisioning/src/general/scripts/aws/common_notebook_configure_dataengine-service.py b/infrastructure-provisioning/src/general/scripts/aws/common_notebook_configure_dataengine-service.py index 4d542b4..b265226 100644 --- a/infrastructure-provisioning/src/general/scripts/aws/common_notebook_configure_dataengine-service.py +++ b/infrastructure-provisioning/src/general/scripts/aws/common_notebook_configure_dataengine-service.py @@ -87,12 +87,12 @@ if __name__ == "__main__": print('[INSTALLING KERNELS INTO SPECIFIED NOTEBOOK]') params = "--bucket {} --cluster_name {} --emr_version {} --keyfile {} --notebook_ip {} --region {} " \ "--emr_excluded_spark_properties {} --project_name {} --os_user {} --edge_hostname {} " \ - "--proxy_port {} --scala_version {} --application {} --pip_mirror {}" \ + "--proxy_port {} --scala_version {} --application {}" \ .format(notebook_config['bucket_name'], notebook_config['cluster_name'], os.environ['emr_version'], notebook_config['key_path'], notebook_config['notebook_ip'], os.environ['aws_region'], os.environ['emr_excluded_spark_properties'], os.environ['project_name'], os.environ['conf_os_user'], edge_instance_hostname, '3128', os.environ['notebook_scala_version'], - os.environ['application'], os.environ['conf_pypi_mirror']) + os.environ['application']) try: subprocess.run("~/scripts/{}_{}.py {}".format(application, 'install_dataengine-service_kernels', params), shell=True, check=True) datalab.actions_lib.remove_emr_tag(notebook_config['cluster_id'], ['State']) diff --git a/infrastructure-provisioning/src/general/scripts/aws/ssn_prepare.py b/infrastructure-provisioning/src/general/scripts/aws/ssn_prepare.py index bee2e3f..6ce0672 100644 --- a/infrastructure-provisioning/src/general/scripts/aws/ssn_prepare.py +++ b/infrastructure-provisioning/src/general/scripts/aws/ssn_prepare.py @@ -37,7 +37,8 @@ if __name__ == "__main__": local_log_filepath = "/logs/" + os.environ['conf_resource'] + "/" + local_log_filename logging.basicConfig(format='%(levelname)-8s [%(asctime)s] %(message)s', level=logging.DEBUG, - filename=local_log_filepath) + filename=local_log_filepath, + handlers=[logging.StreamHandler()]) ssn_conf = dict() ssn_conf['instance'] = 'ssn' ssn_conf['pre_defined_vpc'] = False @@ -59,7 +60,6 @@ if __name__ == "__main__": try: logging.info('[DERIVING NAMES]') - print('[DERIVING NAMES]') ssn_conf['service_base_name'] = os.environ['conf_service_base_name'] = datalab.fab.replace_multi_symbols( os.environ['conf_service_base_name'][:20], '-', True) ssn_conf['role_name'] = '{}-ssn-role'.format(ssn_conf['service_base_name']) @@ -101,7 +101,6 @@ if __name__ == "__main__": try: ssn_conf['pre_defined_vpc'] = True logging.info('[CREATE VPC AND ROUTE TABLE]') - print('[CREATE VPC AND ROUTE TABLE]') params = "--vpc {} --region {} --infra_tag_name {} --infra_tag_value {} --vpc_name {}".format( ssn_conf['vpc_cidr'], ssn_conf['region'], ssn_conf['tag_name'], ssn_conf['service_base_name'], ssn_conf['vpc_name']) @@ -127,7 +126,6 @@ if __name__ == "__main__": try: ssn_conf['pre_defined_vpc2'] = True logging.info('[CREATE SECONDARY VPC AND ROUTE TABLE]') - print('[CREATE SECONDARY VPC AND ROUTE TABLE]') params = "--vpc {} --region {} --infra_tag_name {} --infra_tag_value {} --secondary " \ "--vpc_name {}".format(ssn_conf['vpc2_cidr'], ssn_conf['region'], ssn_conf['tag2_name'], ssn_conf['service_base_name'], ssn_conf['vpc2_name']) @@ -154,7 +152,6 @@ if __name__ == "__main__": try: ssn_conf['pre_defined_subnet'] = True logging.info('[CREATE SUBNET]') - print('[CREATE SUBNET]') params = "--vpc_id {0} --username {1} --infra_tag_name {2} --infra_tag_value {3} --prefix {4} " \ "--ssn {5} --zone {6} --subnet_name {7}".format( os.environ['aws_vpc_id'], 'ssn', ssn_conf['tag_name'],ssn_conf['service_base_name'], '20', @@ -193,7 +190,6 @@ if __name__ == "__main__": except KeyError: try: logging.info('[CREATE PEERING CONNECTION]') - print('[CREATE PEERING CONNECTION]') os.environ['aws_peering_id'] = datalab.actions_lib.create_peering_connection( os.environ['aws_vpc_id'], os.environ['aws_vpc2_id'], ssn_conf['service_base_name']) print('PEERING CONNECTION ID:' + os.environ['aws_peering_id']) @@ -226,7 +222,6 @@ if __name__ == "__main__": try: ssn_conf['pre_defined_sg'] = True logging.info('[CREATE SG FOR SSN]') - print('[CREATE SG FOR SSN]') ssn_conf['ingress_sg_rules_template'] = datalab.meta_lib.format_sg([ { "PrefixListIds": [], @@ -301,7 +296,6 @@ if __name__ == "__main__": try: logging.info('[CREATE ROLES]') - print('[CREATE ROLES]') params = "--role_name {} --role_profile_name {} --policy_name {} --policy_file_name {} --region {} " \ "--infra_tag_name {} --infra_tag_value {} --user_tag_value {}".\ format(ssn_conf['role_name'], ssn_conf['role_profile_name'], ssn_conf['policy_name'], @@ -335,7 +329,6 @@ if __name__ == "__main__": try: logging.info('[CREATE ENDPOINT AND ROUTE-TABLE]') - print('[CREATE ENDPOINT AND ROUTE-TABLE]') params = "--vpc_id {} --region {} --infra_tag_name {} --infra_tag_value {}".format( os.environ['aws_vpc_id'], os.environ['aws_region'], ssn_conf['tag_name'], ssn_conf['service_base_name']) try: @@ -368,7 +361,6 @@ if __name__ == "__main__": if os.environ['conf_duo_vpc_enable'] == 'true': try: logging.info('[CREATE ENDPOINT AND ROUTE-TABLE FOR NOTEBOOK VPC]') - print('[CREATE ENDPOINT AND ROUTE-TABLE FOR NOTEBOOK VPC]') params = "--vpc_id {} --region {} --infra_tag_name {} --infra_tag_value {}".format( os.environ['aws_vpc2_id'], os.environ['aws_region'], ssn_conf['tag2_name'], ssn_conf['service_base_name']) @@ -401,7 +393,6 @@ if __name__ == "__main__": try: logging.info('[CREATE SSN INSTANCE]') - print('[CREATE SSN INSTANCE]') params = "--node_name {0} --ami_id {1} --instance_type {2} --key_name {3} --security_group_ids {4} " \ "--subnet_id {5} --iam_profile {6} --infra_tag_name {7} --infra_tag_value {8} --instance_class {9} " \ "--primary_disk_size {10}".\ @@ -441,7 +432,6 @@ if __name__ == "__main__": if ssn_conf['network_type'] == 'public': try: logging.info('[ASSOCIATING ELASTIC IP]') - print('[ASSOCIATING ELASTIC IP]') ssn_conf['ssn_id'] = datalab.meta_lib.get_instance_by_name(ssn_conf['tag_name'], ssn_conf['instance_name']) try: ssn_conf['elastic_ip'] = os.environ['ssn_elastic_ip'] @@ -489,7 +479,6 @@ if __name__ == "__main__": if 'ssn_hosted_zone_id' in os.environ and 'ssn_hosted_zone_name' in os.environ and 'ssn_subdomain' in os.environ: try: logging.info('[CREATING ROUTE53 RECORD]') - print('[CREATING ROUTE53 RECORD]') try: datalab.actions_lib.create_route_53_record(os.environ['ssn_hosted_zone_id'], os.environ['ssn_hosted_zone_name'], diff --git a/infrastructure-provisioning/src/general/scripts/gcp/common_notebook_configure_dataengine-service.py b/infrastructure-provisioning/src/general/scripts/gcp/common_notebook_configure_dataengine-service.py index a79a4c4..7273709 100644 --- a/infrastructure-provisioning/src/general/scripts/gcp/common_notebook_configure_dataengine-service.py +++ b/infrastructure-provisioning/src/general/scripts/gcp/common_notebook_configure_dataengine-service.py @@ -100,12 +100,11 @@ if __name__ == "__main__": print('[INSTALLING KERNELS INTO SPECIFIED NOTEBOOK]') params = "--bucket {} --cluster_name {} --dataproc_version {} --keyfile {} --notebook_ip {} --region {} " \ "--edge_user_name {} --project_name {} --os_user {} --edge_hostname {} --proxy_port {} " \ - "--scala_version {} --application {} --pip_mirror {}" \ + "--scala_version {} --application {}" \ .format(notebook_config['bucket_name'], notebook_config['cluster_name'], os.environ['dataproc_version'], notebook_config['key_path'], notebook_config['notebook_ip'], os.environ['gcp_region'], notebook_config['edge_user_name'], notebook_config['project_name'], os.environ['conf_os_user'], - edge_instance_hostname, '3128', os.environ['notebook_scala_version'], os.environ['application'], - os.environ['conf_pypi_mirror']) + edge_instance_hostname, '3128', os.environ['notebook_scala_version'], os.environ['application']) try: subprocess.run("~/scripts/{}_{}.py {}".format(application, 'install_dataengine-service_kernels', params), shell=True, check=True) GCPActions.update_dataproc_cluster(notebook_config['cluster_name'], notebook_config['cluster_labels']) diff --git a/infrastructure-provisioning/src/ssn/fabfile.py b/infrastructure-provisioning/src/ssn/fabfile.py index 79243cd..1c107b0 100644 --- a/infrastructure-provisioning/src/ssn/fabfile.py +++ b/infrastructure-provisioning/src/ssn/fabfile.py @@ -26,8 +26,6 @@ import os import sys import traceback import uuid -from datalab.fab import * -from fabric import * @task def run(ctx): @@ -39,14 +37,16 @@ def run(ctx): ssn_config = dict() ssn_config['ssn_unique_index'] = str(uuid.uuid4())[:5] try: - subprocess.run("~/scripts/{}.py --ssn_unique_index {}".format('ssn_prepare', ssn_config['ssn_unique_index']), shell=True, check=True) + subprocess.run("~/scripts/{}.py --ssn_unique_index {}".format('ssn_prepare', ssn_config['ssn_unique_index']), + shell=True, check=True) except Exception as err: traceback.print_exc() append_result("Failed preparing SSN node.", str(err)) sys.exit(1) try: - subprocess.run("~/scripts/{}.py --ssn_unique_index {}".format('ssn_configure', ssn_config['ssn_unique_index']), shell=True, check=True) + subprocess.run("~/scripts/{}.py --ssn_unique_index {}".format('ssn_configure', ssn_config['ssn_unique_index']), + shell=True, check=True) except Exception as err: traceback.print_exc() append_result("Failed configuring SSN node.", str(err)) diff --git a/infrastructure-provisioning/src/ssn/scripts/configure_docker.py b/infrastructure-provisioning/src/ssn/scripts/configure_docker.py index aa20a68..8c0e4cb 100644 --- a/infrastructure-provisioning/src/ssn/scripts/configure_docker.py +++ b/infrastructure-provisioning/src/ssn/scripts/configure_docker.py @@ -73,11 +73,6 @@ def download_toree(): sys.exit(1) -def add_china_repository(datalab_path): - conn.sudo('''bash -c 'cd {1}sources/infrastructure-provisioning/src/base/ && sed -i "/pip install/s/$/ -i https\:\/\/{0}\/simple --trusted-host {0} --timeout 60000/g" Dockerfile' '''.format(os.environ['conf_pypi_mirror'], datalab_path)) - conn.sudo('''bash -c 'cd {}sources/infrastructure-provisioning/src/base/ && sed -i "/pip install/s/jupyter/ipython==5.0.0 jupyter==1.0.0/g" Dockerfile' '''.format(datalab_path)) - conn.sudo('''bash -c 'cd {}sources/infrastructure-provisioning/src/base/ && sed -i "22i COPY general/files/os/debian/sources.list /etc/apt/sources.list" Dockerfile' '''.format(datalab_path)) - def login_in_gcr(os_user, gcr_creds, odahu_image, datalab_path, cloud_provider): if gcr_creds != '': try: @@ -119,8 +114,6 @@ def build_docker_images(image_list, region, datalab_path): 'azure_auth.json'.format(args.keyfile, host_string, args.datalab_path)) conn.sudo('cp {0}sources/infrastructure-provisioning/src/base/azure_auth.json ' '/home/{1}/keys/azure_auth.json'.format(args.datalab_path, args.os_user)) - if region == 'cn-north-1': - add_china_repository(datalab_path) for image in image_list: name = image['name'] tag = image['tag'] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
