This is an automated email from the ASF dual-hosted git repository.
lfrolov pushed a commit to branch develop
in repository https://gitbox.apache.org/repos/asf/incubator-datalab.git
The following commit(s) were added to refs/heads/develop by this push:
new 22fea3505 [DATALAB-2997]: Added Azure HDInsight integration with
RStudio
new 0f9832579 Merge pull request #1743 from OleksandrRepnikov/DATALAB-2997
22fea3505 is described below
commit 22fea3505733c1a0d17d767c07804756835ab445
Author: orepnikov <[email protected]>
AuthorDate: Mon Sep 26 18:39:46 2022 +0300
[DATALAB-2997]: Added Azure HDInsight integration with RStudio
---
.../src/general/lib/azure/actions_lib.py | 327 ++++++++++++++-------
.../scripts/azure/dataengine-service_configure.py | 34 ++-
.../rstudio_install_dataengine-service_kernels.py | 26 ++
3 files changed, 270 insertions(+), 117 deletions(-)
diff --git a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
index 1e3c23275..715652826 100644
--- a/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
+++ b/infrastructure-provisioning/src/general/lib/azure/actions_lib.py
@@ -117,7 +117,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to create Resource Group: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to create Resource Group: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to create Resource Group",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -134,7 +135,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to remove Resource Group: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to remove Resource Group: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to remove Resource Group",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -156,15 +158,15 @@ class AzureActions:
},
"subnets": [
{
- "name": os.environ['azure_subnet_name'],
- "service_endpoints": [
- {
- "service": "Microsoft.Storage",
- "locations": [
- region
- ]
- }
- ]
+ "name": os.environ['azure_subnet_name'],
+ "service_endpoints": [
+ {
+ "service": "Microsoft.Storage",
+ "locations": [
+ region
+ ]
+ }
+ ]
}
]
}
@@ -172,7 +174,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to create Virtual Network: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to create Virtual Network: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to create Virtual Network",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -199,7 +202,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to create Virtual Network peering: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to create Virtual Network peering: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to create Virtual Network
peering",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -214,7 +218,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to remove Virtual Network: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to remove Virtual Network: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to remove Virtual Network",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -231,10 +236,10 @@ class AzureActions:
"address_prefix": subnet_cidr,
"service_endpoints": [
{
- "service": "Microsoft.Storage",
- "locations": [
- region
- ]
+ "service": "Microsoft.Storage",
+ "locations": [
+ region
+ ]
}
]
}
@@ -264,7 +269,8 @@ class AzureActions:
file=sys.stdout)}))
traceback.print_exc(file=sys.stdout)
- def create_security_group(self, resource_group_name,
network_security_group_name, region, tags, list_rules, preexisting_sg = False):
+ def create_security_group(self, resource_group_name,
network_security_group_name, region, tags, list_rules,
+ preexisting_sg=False):
try:
result = ''
if not preexisting_sg:
@@ -287,7 +293,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to create security group: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to create security group: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to create security group",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -296,9 +303,9 @@ class AzureActions:
def remove_security_rules(self, network_security_group, resource_group,
security_rule):
try:
result = self.network_client.security_rules.begin_delete(
- network_security_group_name = network_security_group,
- resource_group_name = resource_group,
- security_rule_name = security_rule).wait()
+ network_security_group_name=network_security_group,
+ resource_group_name=resource_group,
+ security_rule_name=security_rule).wait()
return result
except Exception as err:
logging.info(
@@ -317,7 +324,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to remove security group: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to remove security group: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to remove security group",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -340,7 +348,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to create Data Lake store: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to create Data Lake store: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to create Data Lake store",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -355,7 +364,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to remove Data Lake store: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to remove Data Lake store: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to remove Data Lake store",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -368,7 +378,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to create Data Lake directory: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to create Data Lake directory: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to create Data Lake directory",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -415,7 +426,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to set user permission to Data Lake directory: " +
str(err) + "\n Traceback: " + traceback.print_exc(
+ "Unable to set user permission to Data Lake directory: " + str(
+ err) + "\n Traceback: " + traceback.print_exc(
file=sys.stdout))
append_result(str({"error": "Unable to set user permission to Data
Lake directory",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
@@ -430,7 +442,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to unset user permission to Data Lake directory: " +
str(err) + "\n Traceback: " + traceback.print_exc(
+ "Unable to unset user permission to Data Lake directory: " +
str(
+ err) + "\n Traceback: " + traceback.print_exc(
file=sys.stdout))
append_result(str({"error": "Unable to unset user permission to
Data Lake directory",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
@@ -444,7 +457,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to delete Data Lake directory: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to delete Data Lake directory: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to delete Data Lake directory",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -497,7 +511,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to create Storage account: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to create Storage account: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to create Storage account",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -513,7 +528,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to remove Storage account: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to remove Storage account: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to remove Storage account",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -532,7 +548,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to create blob container: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to create blob container: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to create blob container",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -540,13 +557,15 @@ class AzureActions:
def upload_to_container(self, resource_group_name, account_name,
container_name, files):
try:
- block_blob_service = BlobServiceClient(account_url="https://" +
account_name + ".blob.core.windows.net/", credential=self.credential)
+ block_blob_service = BlobServiceClient(account_url="https://" +
account_name + ".blob.core.windows.net/",
+ credential=self.credential)
for filename in files:
block_blob_service.create_blob_from_path(container_name,
filename, filename)
return ''
except Exception as err:
logging.info(
- "Unable to upload files to container: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to upload files to container: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to upload files to container",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -554,7 +573,8 @@ class AzureActions:
def download_from_container(self, resource_group_name, account_name,
container_name, files):
try:
- block_blob_service = BlobServiceClient(account_url="https://" +
account_name + ".blob.core.windows.net/", credential=self.credential)
+ block_blob_service = BlobServiceClient(account_url="https://" +
account_name + ".blob.core.windows.net/",
+ credential=self.credential)
for filename in files:
block_blob_service.get_blob_to_path(container_name, filename,
filename)
return ''
@@ -562,7 +582,8 @@ class AzureActions:
return ''
except Exception as err:
logging.info(
- "Unable to download files from container: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to download files from container: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to download files from
container",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -586,7 +607,8 @@ class AzureActions:
return
datalab.meta_lib.AzureMeta().get_static_ip(resource_group_name,
ip_name).ip_address
except Exception as err:
logging.info(
- "Unable to create static IP address: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to create static IP address: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to create static IP address",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -604,7 +626,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to deny network access for disk: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to deny network access for disk: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to deny network access for
disk",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -619,7 +642,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to delete static IP address: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to delete static IP address: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to delete static IP address",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -956,7 +980,7 @@ class AzureActions:
instance_parameters =
self.compute_client.virtual_machines.get(resource_group_name, instance_name)
instance_parameters.tags = tags
result =
self.compute_client.virtual_machines.begin_create_or_update(resource_group_name,
instance_name,
-
instance_parameters)
+
instance_parameters)
return result
except Exception as err:
logging.info(
@@ -982,7 +1006,7 @@ class AzureActions:
# Removing public static IP address and network interfaces
network_interface_name = instance_name + '-nif'
for j in
datalab.meta_lib.AzureMeta().get_network_interface(resource_group_name,
-
network_interface_name).ip_configurations:
+
network_interface_name).ip_configurations:
self.delete_network_if(resource_group_name,
network_interface_name)
print("Network interface {} has been
removed".format(network_interface_name))
if j.public_ip_address:
@@ -1016,10 +1040,13 @@ class AzureActions:
def create_network_if(self, resource_group_name, vpc_name, subnet_name,
interface_name, region, security_group_name,
tags, public_ip_name="None"):
try:
- subnet_cidr =
datalab.meta_lib.AzureMeta().get_subnet(resource_group_name, vpc_name,
subnet_name).address_prefix.split('/')[0]
+ subnet_cidr = \
+ datalab.meta_lib.AzureMeta().get_subnet(resource_group_name,
vpc_name, subnet_name).address_prefix.split(
+ '/')[0]
private_ip =
datalab.meta_lib.AzureMeta().check_free_ip(resource_group_name, vpc_name,
subnet_cidr)
subnet_id =
datalab.meta_lib.AzureMeta().get_subnet(resource_group_name, vpc_name,
subnet_name).id
- security_group_id =
datalab.meta_lib.AzureMeta().get_security_group(resource_group_name,
security_group_name).id
+ security_group_id =
datalab.meta_lib.AzureMeta().get_security_group(resource_group_name,
+
security_group_name).id
if public_ip_name == "None":
ip_params = [{
"name": interface_name,
@@ -1063,7 +1090,8 @@ class AzureActions:
return network_interface_id
except Exception as err:
logging.info(
- "Unable to create network interface: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to create network interface: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to create network interface",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -1075,7 +1103,8 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to delete network interface: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to delete network interface: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to delete network interface",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
@@ -1091,9 +1120,10 @@ class AzureActions:
if os.environ['notebook_multiple_clusters'] == 'true':
try:
livy_port = conn.sudo("cat /opt/" + cluster_name +
- "/livy/conf/livy.conf | grep
livy.server.port | tail -n 1 | awk '{printf $3}'").stdout.replace('\n','')
+ "/livy/conf/livy.conf | grep
livy.server.port | tail -n 1 | awk '{printf $3}'").stdout.replace(
+ '\n', '')
process_number = conn.sudo("netstat -natp 2>/dev/null
| grep ':" + livy_port +
- "' | awk '{print $7}' | sed
's|/.*||g'").stdout.replace('\n','')
+ "' | awk '{print $7}' | sed
's|/.*||g'").stdout.replace('\n', '')
conn.sudo('kill -9 ' + process_number)
conn.sudo('systemctl disable livy-server-' + livy_port)
except:
@@ -1215,13 +1245,13 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to create HDInsight Spark cluster: " + str(err) + "\n
Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to create HDInsight Spark cluster: " + str(err) + "\n
Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to create HDInsight Spark
cluster",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
traceback.print_exc(file=sys.stdout)
-
def terminate_hdinsight_cluster(self, resource_group_name, cluster_name):
try:
logging.info('Starting to terminate HDInsight cluster
{}'.format(cluster_name))
@@ -1234,12 +1264,14 @@ class AzureActions:
return result
except Exception as err:
logging.info(
- "Unable to terminate HDInsight Spark cluster: " + str(err) +
"\n Traceback: " + traceback.print_exc(file=sys.stdout))
+ "Unable to terminate HDInsight Spark cluster: " + str(err) +
"\n Traceback: " + traceback.print_exc(
+ file=sys.stdout))
append_result(str({"error": "Unable to terminate HDInsight Spark
cluster",
"error_message": str(err) + "\n Traceback: " +
traceback.print_exc(
file=sys.stdout)}))
traceback.print_exc(file=sys.stdout)
+
def configure_zeppelin_hdinsight_interpreter(cluster_name, os_user,
headnode_ip):
try:
# (self, emr_version, cluster_name, region, spark_dir, os_user,
yarn_dir, bucket,
@@ -1381,10 +1413,13 @@ def
configure_zeppelin_hdinsight_interpreter(cluster_name, os_user, headnode_ip)
traceback.print_exc(file=sys.stdout)
sys.exit(1)
+
def ensure_local_jars(os_user, jars_dir):
- if not
exists(datalab.fab.conn,'/home/{}/.ensure_dir/local_jars_ensured'.format(os_user)):
+ if not exists(datalab.fab.conn,
'/home/{}/.ensure_dir/local_jars_ensured'.format(os_user)):
try:
- hadoop_version = datalab.fab.conn.sudo("ls
/opt/spark/jars/hadoop-common* | sed -n
's/.*\([0-9]\.[0-9]\.[0-9]\).*/\\1/p'").stdout.replace('\n','')
+ hadoop_version = datalab.fab.conn.sudo(
+ "ls /opt/spark/jars/hadoop-common* | sed -n
's/.*\([0-9]\.[0-9]\.[0-9]\).*/\\1/p'").stdout.replace('\n',
+
'')
print("Downloading local jars for Azure")
datalab.fab.conn.sudo('mkdir -p {}'.format(jars_dir))
if os.environ['azure_datalake_enable'] == 'false':
@@ -1424,7 +1459,8 @@ def configure_local_spark(jars_dir, templates_dir,
memory_type='driver'):
spark_jars_paths = None
if exists(datalab.fab.conn, '/opt/spark/conf/spark-defaults.conf'):
try:
- spark_jars_paths = datalab.fab.conn.sudo('cat
/opt/spark/conf/spark-defaults.conf | grep -e "^spark.jars " ').stdout
+ spark_jars_paths = datalab.fab.conn.sudo(
+ 'cat /opt/spark/conf/spark-defaults.conf | grep -e
"^spark.jars " ').stdout
except:
spark_jars_paths = None
user_storage_account_tag =
"{}-{}-{}-bucket".format(os.environ['conf_service_base_name'],
@@ -1432,7 +1468,8 @@ def configure_local_spark(jars_dir, templates_dir,
memory_type='driver'):
os.environ['endpoint_name'].lower())
shared_storage_account_tag =
'{0}-{1}-shared-bucket'.format(os.environ['conf_service_base_name'],
os.environ['endpoint_name'].lower())
- for storage_account in
datalab.meta_lib.AzureMeta().list_storage_accounts(os.environ['azure_resource_group_name']):
+ for storage_account in
datalab.meta_lib.AzureMeta().list_storage_accounts(
+ os.environ['azure_resource_group_name']):
if user_storage_account_tag == storage_account.tags["Name"]:
user_storage_account_name = storage_account.name
user_storage_account_key =
datalab.meta_lib.AzureMeta().list_storage_keys(
@@ -1445,10 +1482,13 @@ def configure_local_spark(jars_dir, templates_dir,
memory_type='driver'):
datalab.fab.conn.put(templates_dir + 'core-site-storage.xml',
'/tmp/core-site.xml')
else:
datalab.fab.conn.put(templates_dir + 'core-site-datalake.xml',
'/tmp/core-site.xml')
- datalab.fab.conn.sudo('sed -i "s|USER_STORAGE_ACCOUNT|{}|g"
/tmp/core-site.xml'.format(user_storage_account_name))
- datalab.fab.conn.sudo('sed -i "s|SHARED_STORAGE_ACCOUNT|{}|g"
/tmp/core-site.xml'.format(shared_storage_account_name))
+ datalab.fab.conn.sudo(
+ 'sed -i "s|USER_STORAGE_ACCOUNT|{}|g"
/tmp/core-site.xml'.format(user_storage_account_name))
+ datalab.fab.conn.sudo(
+ 'sed -i "s|SHARED_STORAGE_ACCOUNT|{}|g"
/tmp/core-site.xml'.format(shared_storage_account_name))
datalab.fab.conn.sudo('sed -i "s|USER_ACCOUNT_KEY|{}|g"
/tmp/core-site.xml'.format(user_storage_account_key))
- datalab.fab.conn.sudo('sed -i "s|SHARED_ACCOUNT_KEY|{}|g"
/tmp/core-site.xml'.format(shared_storage_account_key))
+ datalab.fab.conn.sudo(
+ 'sed -i "s|SHARED_ACCOUNT_KEY|{}|g"
/tmp/core-site.xml'.format(shared_storage_account_key))
if os.environ['azure_datalake_enable'] == 'true':
client_id = os.environ['azure_application_id']
refresh_token = os.environ['azure_user_refresh_token']
@@ -1460,18 +1500,22 @@ def configure_local_spark(jars_dir, templates_dir,
memory_type='driver'):
else:
datalab.fab.conn.sudo('rm -f /opt/hadoop/etc/hadoop/core-site.xml')
datalab.fab.conn.sudo('mv /tmp/core-site.xml
/opt/hadoop/etc/hadoop/core-site.xml')
- datalab.fab.conn.put(templates_dir +
'notebook_spark-defaults_local.conf', '/tmp/notebook_spark-defaults_local.conf')
+ datalab.fab.conn.put(templates_dir +
'notebook_spark-defaults_local.conf',
+ '/tmp/notebook_spark-defaults_local.conf')
datalab.fab.conn.sudo("jar_list=`find {} -name '*.jar' | tr '\\n' ','`
; echo \"spark.jars $jar_list\" >> \
/tmp/notebook_spark-defaults_local.conf".format(jars_dir))
datalab.fab.conn.sudo('cp -f /tmp/notebook_spark-defaults_local.conf
/opt/spark/conf/spark-defaults.conf')
if memory_type == 'driver':
spark_memory = datalab.fab.get_spark_memory()
datalab.fab.conn.sudo('sed -i "/spark.*.memory/d"
/opt/spark/conf/spark-defaults.conf')
- datalab.fab.conn.sudo('''bash -c 'echo "spark.{0}.memory {1}m" >>
/opt/spark/conf/spark-defaults.conf' '''.format(memory_type,
-
spark_memory))
- if not exists(datalab.fab.conn,'/opt/spark/conf/spark-env.sh'):
+ datalab.fab.conn.sudo(
+ '''bash -c 'echo "spark.{0}.memory {1}m" >>
/opt/spark/conf/spark-defaults.conf' '''.format(memory_type,
+
spark_memory))
+ if not exists(datalab.fab.conn, '/opt/spark/conf/spark-env.sh'):
datalab.fab.conn.sudo('mv /opt/spark/conf/spark-env.sh.template
/opt/spark/conf/spark-env.sh')
- java_home = datalab.fab.conn.run("update-alternatives --query java |
grep -o --color=never
\'/.*/java-8.*/jre\'").stdout.splitlines()[0].replace('\n','')
+ java_home = datalab.fab.conn.run(
+ "update-alternatives --query java | grep -o --color=never
\'/.*/java-8.*/jre\'").stdout.splitlines()[
+ 0].replace('\n', '')
datalab.fab.conn.sudo("echo 'export JAVA_HOME=\'{}\'' >>
/opt/spark/conf/spark-env.sh".format(java_home))
if 'spark_configurations' in os.environ:
datalab_header = datalab.fab.conn.sudo('cat
/tmp/notebook_spark-defaults_local.conf | grep "^#"').stdout
@@ -1490,13 +1534,15 @@ def configure_local_spark(jars_dir, templates_dir,
memory_type='driver'):
new_spark_defaults.append(property + ' ' +
config['Properties'][property])
new_spark_defaults.append(param)
new_spark_defaults = set(new_spark_defaults)
- datalab.fab.conn.sudo('''bash -c 'echo "{}" >
/opt/spark/conf/spark-defaults.conf' '''.format(datalab_header))
+ datalab.fab.conn.sudo(
+ '''bash -c 'echo "{}" > /opt/spark/conf/spark-defaults.conf'
'''.format(datalab_header))
for prop in new_spark_defaults:
prop = prop.rstrip()
datalab.fab.conn.sudo('''bash -c 'echo "{}" >>
/opt/spark/conf/spark-defaults.conf' '''.format(prop))
datalab.fab.conn.sudo('sed -i "/^\s*$/d"
/opt/spark/conf/spark-defaults.conf')
if spark_jars_paths:
- datalab.fab.conn.sudo('''bash -c 'echo "{}" >>
/opt/spark/conf/spark-defaults.conf' '''.format(spark_jars_paths))
+ datalab.fab.conn.sudo(
+ '''bash -c 'echo "{}" >>
/opt/spark/conf/spark-defaults.conf' '''.format(spark_jars_paths))
except Exception as err:
print('Error:', str(err))
sys.exit(1)
@@ -1507,24 +1553,34 @@ def configure_dataengine_spark(cluster_name, jars_dir,
cluster_dir, datalake_ena
/tmp/{1}/notebook_spark-defaults_local.conf".format(jars_dir,
cluster_name), shell=True, check=True)
if os.path.exists('{0}spark/conf/spark-defaults.conf'.format(cluster_dir)):
additional_spark_properties = subprocess.run('diff
--changed-group-format="%>" --unchanged-group-format="" '
-
'/tmp/{0}/notebook_spark-defaults_local.conf '
- '{1}spark/conf/spark-defaults.conf
| grep -v "^#"'.format(
- cluster_name, cluster_dir),
capture_output=True, shell=True,
check=True).stdout.decode('UTF-8').rstrip("\n\r")
+
'/tmp/{0}/notebook_spark-defaults_local.conf '
+
'{1}spark/conf/spark-defaults.conf | grep -v "^#"'.format(
+ cluster_name, cluster_dir), capture_output=True, shell=True,
check=True).stdout.decode('UTF-8').rstrip(
+ "\n\r")
for property in additional_spark_properties.split('\n'):
- subprocess.run('echo "{0}" >>
/tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name),
shell=True, check=True)
+ subprocess.run('echo "{0}" >>
/tmp/{1}/notebook_spark-defaults_local.conf'.format(property, cluster_name),
+ shell=True, check=True)
if os.path.exists('{0}'.format(cluster_dir)):
- subprocess.run('cp -f /tmp/{0}/notebook_spark-defaults_local.conf
{1}spark/conf/spark-defaults.conf'.format(cluster_name,
-
cluster_dir), shell=True, check=True)
+ subprocess.run(
+ 'cp -f /tmp/{0}/notebook_spark-defaults_local.conf
{1}spark/conf/spark-defaults.conf'.format(cluster_name,
+
cluster_dir),
+ shell=True, check=True)
if datalake_enabled == 'false':
- subprocess.run('cp -f /opt/spark/conf/core-site.xml
{}spark/conf/'.format(cluster_dir), shell=True, check=True)
+ subprocess.run('cp -f /opt/spark/conf/core-site.xml
{}spark/conf/'.format(cluster_dir), shell=True,
+ check=True)
else:
- subprocess.run('cp -f /opt/hadoop/etc/hadoop/core-site.xml
{}hadoop/etc/hadoop/core-site.xml'.format(cluster_dir), shell=True, check=True)
+ subprocess.run(
+ 'cp -f /opt/hadoop/etc/hadoop/core-site.xml
{}hadoop/etc/hadoop/core-site.xml'.format(cluster_dir),
+ shell=True, check=True)
if spark_configs and os.path.exists('{0}'.format(cluster_dir)):
- datalab_header = subprocess.run('cat
/tmp/{0}/notebook_spark-defaults_local.conf | grep "^#"'.format(cluster_name),
- capture_output=True, shell=True,
check=True).stdout.decode('UTF-8').rstrip("\n\r")
+ datalab_header = subprocess.run(
+ 'cat /tmp/{0}/notebook_spark-defaults_local.conf | grep
"^#"'.format(cluster_name),
+ capture_output=True, shell=True,
check=True).stdout.decode('UTF-8').rstrip("\n\r")
spark_configurations = ast.literal_eval(spark_configs)
new_spark_defaults = list()
- spark_defaults = subprocess.run('cat
{0}spark/conf/spark-defaults.conf'.format(cluster_dir), capture_output=True,
shell=True, check=True).stdout.decode('UTF-8').rstrip("\n\r")
+ spark_defaults = subprocess.run('cat
{0}spark/conf/spark-defaults.conf'.format(cluster_dir),
+ capture_output=True, shell=True,
check=True).stdout.decode('UTF-8').rstrip(
+ "\n\r")
current_spark_properties = spark_defaults.split('\n')
for param in current_spark_properties:
if param.split(' ')[0] != '#':
@@ -1537,11 +1593,14 @@ def configure_dataengine_spark(cluster_name, jars_dir,
cluster_dir, datalake_ena
new_spark_defaults.append(property + ' ' +
config['Properties'][property])
new_spark_defaults.append(param)
new_spark_defaults = set(new_spark_defaults)
- subprocess.run("echo '{0}' >
{1}/spark/conf/spark-defaults.conf".format(datalab_header, cluster_dir),
shell=True, check=True)
+ subprocess.run("echo '{0}' >
{1}/spark/conf/spark-defaults.conf".format(datalab_header, cluster_dir),
+ shell=True, check=True)
for prop in new_spark_defaults:
prop = prop.rstrip()
- subprocess.run('echo "{0}" >>
{1}/spark/conf/spark-defaults.conf'.format(prop, cluster_dir), shell=True,
check=True)
- subprocess.run('sed -i "/^\s*$/d"
{0}/spark/conf/spark-defaults.conf'.format(cluster_dir), shell=True, check=True)
+ subprocess.run('echo "{0}" >>
{1}/spark/conf/spark-defaults.conf'.format(prop, cluster_dir), shell=True,
+ check=True)
+ subprocess.run('sed -i "/^\s*$/d"
{0}/spark/conf/spark-defaults.conf'.format(cluster_dir), shell=True,
+ check=True)
def remount_azure_disk(creds=False, os_user='', hostname='', keyfile=''):
@@ -1556,6 +1615,7 @@ def remount_azure_disk(creds=False, os_user='',
hostname='', keyfile=''):
if creds:
conn.close()
+
def ensure_right_mount_paths(creds=False, os_user='', hostname='', keyfile=''):
if creds:
global conn
@@ -1563,16 +1623,19 @@ def ensure_right_mount_paths(creds=False, os_user='',
hostname='', keyfile=''):
else:
conn = datalab.fab.conn
opt_disk = conn.sudo("cat /etc/fstab | grep /opt/ | awk '{print
$1}'").stdout.split('\n')[0].split('/')[2]
- if opt_disk not in conn.sudo("lsblk | grep /opt", warn=True).stdout or
opt_disk in conn.sudo("fdisk -l | grep 'BIOS boot'").stdout:
- disk_names = conn.sudo("lsblk | grep disk | awk '{print $1}' |
sort").stdout.split('\n')
- for disk in disk_names:
- if disk != '' and disk not in conn.sudo('lsblk | grep -E
"(mnt|media)"').stdout and disk not in conn.sudo("fdisk -l | grep 'BIOS
boot'").stdout:
- conn.sudo("umount -l /opt")
- conn.sudo("mount /dev/{}1 /opt".format(disk))
- conn.sudo('sed -i "/opt/ s|/dev/{}|/dev/{}1|g"
/etc/fstab'.format(opt_disk, disk))
+ if opt_disk not in conn.sudo("lsblk | grep /opt", warn=True).stdout or
opt_disk in conn.sudo(
+ "fdisk -l | grep 'BIOS boot'").stdout:
+ disk_names = conn.sudo("lsblk | grep disk | awk '{print $1}' |
sort").stdout.split('\n')
+ for disk in disk_names:
+ if disk != '' and disk not in conn.sudo('lsblk | grep -E
"(mnt|media)"').stdout and disk not in conn.sudo(
+ "fdisk -l | grep 'BIOS boot'").stdout:
+ conn.sudo("umount -l /opt")
+ conn.sudo("mount /dev/{}1 /opt".format(disk))
+ conn.sudo('sed -i "/opt/ s|/dev/{}|/dev/{}1|g"
/etc/fstab'.format(opt_disk, disk))
if creds:
conn.close()
+
def prepare_vm_for_image(creds=False, os_user='', hostname='', keyfile=''):
if creds:
global conn
@@ -1583,7 +1646,7 @@ def prepare_vm_for_image(creds=False, os_user='',
hostname='', keyfile=''):
def prepare_disk(os_user):
- if not exists(datalab.fab.conn,'/home/' + os_user +
'/.ensure_dir/disk_ensured'):
+ if not exists(datalab.fab.conn, '/home/' + os_user +
'/.ensure_dir/disk_ensured'):
try:
allow = False
counter = 0
@@ -1618,7 +1681,8 @@ def prepare_disk(os_user):
else:
sys.exit(1)
datalab.fab.conn.sudo('mount /dev/{}1 /opt/'.format(disk))
- datalab.fab.conn.sudo(''' bash -c "echo '/dev/{}1 /opt/
ext4 errors=remount-ro 0 1' >> /etc/fstab" '''.format(disk))
+ datalab.fab.conn.sudo(
+ ''' bash -c "echo '/dev/{}1 /opt/ ext4
errors=remount-ro 0 1' >> /etc/fstab" '''.format(disk))
datalab.fab.conn.sudo('touch /home/' + os_user +
'/.ensure_dir/disk_ensured')
except Exception as err:
@@ -1629,33 +1693,51 @@ def prepare_disk(os_user):
ensure_right_mount_paths()
+def ensure_hdinsight_secret(os_user, computational_name, cluster_password):
+ if not exists(datalab.fab.conn,
'/home/{}/.ensure_dir/hdinsight_secret_ensured'.format(os_user)):
+ try:
+ datalab.fab.conn.sudo('''echo '{}-access-password="{}"' >>
/home/{}/.Renviron'''
+ .format(computational_name,
cluster_password, os_user))
+ datalab.fab.conn.sudo('touch
/home/{}/.ensure_dir/hdinsight_secret_ensured'.format(os_user))
+ except Exception as err:
+ print('Error:', str(err))
+ sys.exit(1)
+
+
def ensure_local_spark(os_user, spark_link, spark_version, hadoop_version,
local_spark_path):
- if not exists(datalab.fab.conn,'/home/' + os_user +
'/.ensure_dir/local_spark_ensured'):
+ if not exists(datalab.fab.conn, '/home/' + os_user +
'/.ensure_dir/local_spark_ensured'):
try:
if os.environ['azure_datalake_enable'] == 'false':
- datalab.fab.conn.sudo('wget ' + spark_link + ' -O /tmp/spark-'
+ spark_version + '-bin-hadoop' + hadoop_version + '.tgz')
- datalab.fab.conn.sudo('tar -zxvf /tmp/spark-' + spark_version
+ '-bin-hadoop' + hadoop_version + '.tgz -C /opt/')
- datalab.fab.conn.sudo('mv /opt/spark-' + spark_version +
'-bin-hadoop' + hadoop_version + ' ' + local_spark_path)
+ datalab.fab.conn.sudo(
+ 'wget ' + spark_link + ' -O /tmp/spark-' + spark_version +
'-bin-hadoop' + hadoop_version + '.tgz')
+ datalab.fab.conn.sudo(
+ 'tar -zxvf /tmp/spark-' + spark_version + '-bin-hadoop' +
hadoop_version + '.tgz -C /opt/')
+ datalab.fab.conn.sudo(
+ 'mv /opt/spark-' + spark_version + '-bin-hadoop' +
hadoop_version + ' ' + local_spark_path)
datalab.fab.conn.sudo('chown -R ' + os_user + ':' + os_user +
' ' + local_spark_path)
datalab.fab.conn.sudo('touch /home/' + os_user +
'/.ensure_dir/local_spark_ensured')
else:
# Downloading Spark without Hadoop
- datalab.fab.conn.sudo('wget
https://archive.apache.org/dist/spark/spark-{0}/spark-{0}-bin-without-hadoop.tgz
-O /tmp/spark-{0}-bin-without-hadoop.tgz'
+ datalab.fab.conn.sudo(
+ 'wget
https://archive.apache.org/dist/spark/spark-{0}/spark-{0}-bin-without-hadoop.tgz
-O /tmp/spark-{0}-bin-without-hadoop.tgz'
.format(spark_version))
datalab.fab.conn.sudo('tar -zxvf
/tmp/spark-{}-bin-without-hadoop.tgz -C /opt/'.format(spark_version))
datalab.fab.conn.sudo('mv /opt/spark-{}-bin-without-hadoop
{}'.format(spark_version, local_spark_path))
datalab.fab.conn.sudo('chown -R {0}:{0} {1}'.format(os_user,
local_spark_path))
# Downloading Hadoop
hadoop_version = '3.0.0'
- datalab.fab.conn.sudo('wget
https://archive.apache.org/dist/hadoop/common/hadoop-{0}/hadoop-{0}.tar.gz -O
/tmp/hadoop-{0}.tar.gz'
+ datalab.fab.conn.sudo(
+ 'wget
https://archive.apache.org/dist/hadoop/common/hadoop-{0}/hadoop-{0}.tar.gz -O
/tmp/hadoop-{0}.tar.gz'
.format(hadoop_version))
datalab.fab.conn.sudo('tar -zxvf /tmp/hadoop-{0}.tar.gz -C
/opt/'.format(hadoop_version))
datalab.fab.conn.sudo('mv /opt/hadoop-{0}
/opt/hadoop/'.format(hadoop_version))
datalab.fab.conn.sudo('chown -R {0}:{0}
/opt/hadoop/'.format(os_user))
# Configuring Hadoop and Spark
java_path = datalab.common_lib.find_java_path_remote()
- datalab.fab.conn.sudo('echo "export JAVA_HOME={}" >>
/opt/hadoop/etc/hadoop/hadoop-env.sh'.format(java_path))
- datalab.fab.conn.sudo("""echo 'export
HADOOP_CLASSPATH="$HADOOP_HOME/share/hadoop/tools/lib/*"' >>
/opt/hadoop/etc/hadoop/hadoop-env.sh""")
+ datalab.fab.conn.sudo(
+ 'echo "export JAVA_HOME={}" >>
/opt/hadoop/etc/hadoop/hadoop-env.sh'.format(java_path))
+ datalab.fab.conn.sudo(
+ """echo 'export
HADOOP_CLASSPATH="$HADOOP_HOME/share/hadoop/tools/lib/*"' >>
/opt/hadoop/etc/hadoop/hadoop-env.sh""")
datalab.fab.conn.sudo('echo "export HADOOP_HOME=/opt/hadoop/"
>> /opt/spark/conf/spark-env.sh')
datalab.fab.conn.sudo('echo "export SPARK_HOME=/opt/spark/" >>
/opt/spark/conf/spark-env.sh')
spark_dist_classpath =
datalab.fab.conn.sudo('/opt/hadoop/bin/hadoop classpath').stdout
@@ -1667,34 +1749,55 @@ def ensure_local_spark(os_user, spark_link,
spark_version, hadoop_version, local
sys.exit(1)
-def install_dataengine_spark(cluster_name, spark_link, spark_version,
hadoop_version, cluster_dir, os_user, datalake_enabled):
+def install_dataengine_spark(cluster_name, spark_link, spark_version,
hadoop_version, cluster_dir, os_user,
+ datalake_enabled):
try:
if datalake_enabled == 'false':
- subprocess.run('wget ' + spark_link + ' -O /tmp/' + cluster_name +
'/spark-' + spark_version + '-bin-hadoop' + hadoop_version + '.tgz',
shell=True, check=True)
- subprocess.run('tar -zxvf /tmp/' + cluster_name + '/spark-' +
spark_version + '-bin-hadoop' + hadoop_version + '.tgz -C /opt/', shell=True,
check=True)
- subprocess.run('mv /opt/spark-' + spark_version + '-bin-hadoop' +
hadoop_version + ' ' + cluster_dir + 'spark/', shell=True, check=True)
+ subprocess.run(
+ 'wget ' + spark_link + ' -O /tmp/' + cluster_name + '/spark-'
+ spark_version + '-bin-hadoop' + hadoop_version + '.tgz',
+ shell=True, check=True)
+ subprocess.run(
+ 'tar -zxvf /tmp/' + cluster_name + '/spark-' + spark_version +
'-bin-hadoop' + hadoop_version + '.tgz -C /opt/',
+ shell=True, check=True)
+ subprocess.run(
+ 'mv /opt/spark-' + spark_version + '-bin-hadoop' +
hadoop_version + ' ' + cluster_dir + 'spark/',
+ shell=True, check=True)
subprocess.run('chown -R ' + os_user + ':' + os_user + ' ' +
cluster_dir + 'spark/', shell=True, check=True)
else:
# Downloading Spark without Hadoop
- subprocess.run('wget
https://archive.apache.org/dist/spark/spark-{0}/spark-{0}-bin-without-hadoop.tgz
-O /tmp/{1}/spark-{0}-bin-without-hadoop.tgz'
- .format(spark_version, cluster_name), shell=True, check=True)
- subprocess.run('tar -zxvf /tmp/' + cluster_name +
'/spark-{}-bin-without-hadoop.tgz -C /opt/'.format(spark_version), shell=True,
check=True)
- subprocess.run('mv /opt/spark-{}-bin-without-hadoop
{}spark/'.format(spark_version, cluster_dir), shell=True, check=True)
+ subprocess.run(
+ 'wget
https://archive.apache.org/dist/spark/spark-{0}/spark-{0}-bin-without-hadoop.tgz
-O /tmp/{1}/spark-{0}-bin-without-hadoop.tgz'
+ .format(spark_version, cluster_name), shell=True, check=True)
+ subprocess.run(
+ 'tar -zxvf /tmp/' + cluster_name +
'/spark-{}-bin-without-hadoop.tgz -C /opt/'.format(spark_version),
+ shell=True, check=True)
+ subprocess.run('mv /opt/spark-{}-bin-without-hadoop
{}spark/'.format(spark_version, cluster_dir),
+ shell=True, check=True)
subprocess.run('chown -R {0}:{0} {1}/spark/'.format(os_user,
cluster_dir), shell=True, check=True)
# Downloading Hadoop
hadoop_version = '3.0.0'
- subprocess.run('wget
https://archive.apache.org/dist/hadoop/common/hadoop-{0}/hadoop-{0}.tar.gz -O
/tmp/{1}/hadoop-{0}.tar.gz'
- .format(hadoop_version, cluster_name), shell=True, check=True)
- subprocess.run('tar -zxvf /tmp/' + cluster_name +
'/hadoop-{0}.tar.gz -C /opt/'.format(hadoop_version), shell=True, check=True)
+ subprocess.run(
+ 'wget
https://archive.apache.org/dist/hadoop/common/hadoop-{0}/hadoop-{0}.tar.gz -O
/tmp/{1}/hadoop-{0}.tar.gz'
+ .format(hadoop_version, cluster_name), shell=True, check=True)
+ subprocess.run('tar -zxvf /tmp/' + cluster_name +
'/hadoop-{0}.tar.gz -C /opt/'.format(hadoop_version),
+ shell=True, check=True)
subprocess.run('mv /opt/hadoop-{0}
{1}hadoop/'.format(hadoop_version, cluster_dir), shell=True, check=True)
subprocess.run('chown -R {0}:{0} {1}hadoop/'.format(os_user,
cluster_dir), shell=True, check=True)
# Configuring Hadoop and Spark
java_path = datalab.common_lib.find_java_path_local()
- subprocess.run('echo "export JAVA_HOME={}" >>
{}hadoop/etc/hadoop/hadoop-env.sh'.format(java_path, cluster_dir), shell=True,
check=True)
- subprocess.run("""echo 'export
HADOOP_CLASSPATH="$HADOOP_HOME/share/hadoop/tools/lib/*"' >>
{}hadoop/etc/hadoop/hadoop-env.sh""".format(cluster_dir), shell=True,
check=True)
- subprocess.run('echo "export HADOOP_HOME={0}hadoop/" >>
{0}spark/conf/spark-env.sh'.format(cluster_dir), shell=True, check=True)
- subprocess.run('echo "export SPARK_HOME={0}spark/" >>
{0}spark/conf/spark-env.sh'.format(cluster_dir), shell=True, check=True)
- spark_dist_classpath = subprocess.run('{}hadoop/bin/hadoop
classpath'.format(cluster_dir), capture_output=True, shell=True,
check=True).stdout.decode('UTF-8').rstrip("\n\r")
+ subprocess.run(
+ 'echo "export JAVA_HOME={}" >>
{}hadoop/etc/hadoop/hadoop-env.sh'.format(java_path, cluster_dir),
+ shell=True, check=True)
+ subprocess.run(
+ """echo 'export
HADOOP_CLASSPATH="$HADOOP_HOME/share/hadoop/tools/lib/*"' >>
{}hadoop/etc/hadoop/hadoop-env.sh""".format(
+ cluster_dir), shell=True, check=True)
+ subprocess.run('echo "export HADOOP_HOME={0}hadoop/" >>
{0}spark/conf/spark-env.sh'.format(cluster_dir),
+ shell=True, check=True)
+ subprocess.run('echo "export SPARK_HOME={0}spark/" >>
{0}spark/conf/spark-env.sh'.format(cluster_dir),
+ shell=True, check=True)
+ spark_dist_classpath = subprocess.run('{}hadoop/bin/hadoop
classpath'.format(cluster_dir),
+ capture_output=True,
shell=True, check=True).stdout.decode(
+ 'UTF-8').rstrip("\n\r")
subprocess.run('echo "export SPARK_DIST_CLASSPATH={}" >>
{}spark/conf/spark-env.sh'.format(
spark_dist_classpath, cluster_dir), shell=True, check=True)
except:
diff --git
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
index cc0dcfa11..7fae27104 100644
---
a/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
+++
b/infrastructure-provisioning/src/general/scripts/azure/dataengine-service_configure.py
@@ -40,6 +40,14 @@ parser.add_argument('--uuid', type=str, default='')
parser.add_argument('--access_password', type=str, default='')
args = parser.parse_args()
+
+def add_notebook_secret(resource_group_name, instance_name, os_user, keyfile,
computational_name, cluster_password):
+ private_ip = AzureMeta.get_private_ip_address(resource_group_name,
instance_name)
+ global conn
+ conn = datalab.fab.init_datalab_connection(private_ip, os_user, keyfile)
+ datalab.actions_lib.ensure_hdinsight_secret(os_user, computational_name,
cluster_password)
+
+
if __name__ == "__main__":
try:
AzureMeta = datalab.meta_lib.AzureMeta()
@@ -56,21 +64,36 @@ if __name__ == "__main__":
hdinsight_conf['project_tag'] = hdinsight_conf['project_name']
hdinsight_conf['endpoint_name'] = os.environ['endpoint_name']
hdinsight_conf['endpoint_tag'] = hdinsight_conf['endpoint_name']
- hdinsight_conf['key_name'] = os.environ['conf_key_name']
hdinsight_conf['hdinsight_master_instance_type'] =
os.environ['hdinsight_master_instance_type']
hdinsight_conf['hdinsight_slave_instance_type'] =
os.environ['hdinsight_slave_instance_type']
+ hdinsight_conf['key_path'] =
'{}/{}.pem'.format(os.environ['conf_key_dir'],
+
os.environ['conf_key_name'])
if 'computational_name' in os.environ:
hdinsight_conf['computational_name'] =
os.environ['computational_name']
else:
hdinsight_conf['computational_name'] = ''
hdinsight_conf['cluster_name'] =
'{}-{}-{}-des-{}'.format(hdinsight_conf['service_base_name'],
-
hdinsight_conf['project_name'],
-
hdinsight_conf['endpoint_name'],
-
hdinsight_conf['computational_name'])
+
hdinsight_conf['project_name'],
+
hdinsight_conf['endpoint_name'],
+
hdinsight_conf['computational_name'])
hdinsight_conf['cluster_url'] =
'https://{}.azurehdinsight.net'.format(hdinsight_conf['cluster_name'])
hdinsight_conf['cluster_jupyter_url'] =
'{}/jupyter/'.format(hdinsight_conf['cluster_url'])
hdinsight_conf['cluster_sparkhistory_url'] =
'{}/sparkhistory/'.format(hdinsight_conf['cluster_url'])
hdinsight_conf['cluster_zeppelin_url'] =
'{}/zeppelin/'.format(hdinsight_conf['cluster_url'])
+
+ if os.environ["application"] == "rstudio":
+ add_notebook_secret(hdinsight_conf['resource_group_name'],
os.environ["notebook_instance_name"],
+ os.environ["conf_os_user"],
hdinsight_conf['key_path'],
+ hdinsight_conf['computational_name'],
args.access_password)
+ hdinsight_conf['rstudio_livy_connection'] = 'library(sparklyr); ' \
+ 'sc <-
spark_connect(master = "{}/livy/", ' \
+ 'version = "3.1.1",
method = "livy", ' \
+ 'config =
livy_config(username = "{}", ' \
+ 'password =
Sys.getenv("{}-access-password")))' \
+ .format(hdinsight_conf['cluster_url'],
os.environ["conf_os_user"], hdinsight_conf['computational_name'])
+ else:
+ hdinsight_conf['rstudio_livy_connection'] = ''
+
logging.info('[SUMMARY]')
logging.info("Service base name:
{}".format(hdinsight_conf['service_base_name']))
logging.info("Region: {}".format(hdinsight_conf['region']))
@@ -98,7 +121,8 @@ if __name__ == "__main__":
"url": hdinsight_conf['cluster_jupyter_url']},
{"description": "Zeppelin notebook",
"url": hdinsight_conf['cluster_zeppelin_url']}
- ]
+ ],
+ "Connection_string":
hdinsight_conf['rstudio_livy_connection']
}
result.write(json.dumps(res))
except Exception as err:
diff --git
a/infrastructure-provisioning/src/general/scripts/azure/rstudio_install_dataengine-service_kernels.py
b/infrastructure-provisioning/src/general/scripts/azure/rstudio_install_dataengine-service_kernels.py
new file mode 100644
index 000000000..06d884267
--- /dev/null
+++
b/infrastructure-provisioning/src/general/scripts/azure/rstudio_install_dataengine-service_kernels.py
@@ -0,0 +1,26 @@
+#!/usr/bin/python3
+
+# *****************************************************************************
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
******************************************************************************
+
+
+if __name__ == "__main__":
+ pass
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]