This is an automated email from the ASF dual-hosted git repository.
knarendran pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/fluo-muchos.git
The following commit(s) were added to refs/heads/main by this push:
new 3516729 Add support for multiple VMSS on Azure (#394)
3516729 is described below
commit 3516729b303acefb2898e557fb6d4ab643a38074
Author: Karthick Narendran <[email protected]>
AuthorDate: Tue May 11 10:13:26 2021 +0100
Add support for multiple VMSS on Azure (#394)
Co-authored-by: Karthick Narendran <[email protected]>
---
README.md | 2 +
ansible/azure_terminate.yml | 17 ++
ansible/library/azure_host_role_map.py | 92 +++++++++
...ure_rm_virtualmachinescaleset_nic_list_facts.py | 3 -
.../roles/azure/tasks/assign_msi_multiple_vmss.yml | 40 ++++
ansible/roles/azure/tasks/create_multiple_vmss.yml | 228 +++++++++++++++++++++
ansible/roles/azure/tasks/main.yml | 7 +-
conf/.gitignore | 2 +
conf/azure_multiple_vmss_vars.yml.example | 94 +++++++++
conf/muchos.props.example | 2 +
docs/azure-multiple-vmss.md | 34 +++
lib/muchos/azure.py | 136 ++++++++++++
lib/muchos/config/azure.py | 16 ++
13 files changed, 669 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index 04c300c..390793b 100644
--- a/README.md
+++ b/README.md
@@ -168,6 +168,8 @@ Under the `azure` section, edit following values as per
your configuration:
* `vnet` to provide the name of the VNET that your cluster nodes should use. A
new VNET with this name will be
created if it doesn't already exist
* `subnet` to provide a name for the subnet within which the cluster resources
will be deployed
+* `use_multiple_vmss` allows you to configure VMs with different CPU, memory,
disks for leaders and workers. To
+ know more about this feature, please follow the
[doc](docs/azure-multiple-vmss.md).
* `azure_image_reference` allows you to specify the CentOS image SKU in the
format as shown below. To configure
CentOS 8.x, please follow [these steps](docs/azure-image-reference.md).
```bash
diff --git a/ansible/azure_terminate.yml b/ansible/azure_terminate.yml
index 2928924..b7edc43 100644
--- a/ansible/azure_terminate.yml
+++ b/ansible/azure_terminate.yml
@@ -76,6 +76,23 @@
name: "{{ vmss_name }}"
remove_on_absent: all
state: absent
+ when: use_multiple_vmss is not defined or not use_multiple_vmss
+
+ - name: Include azure_multiple_vmss_vars.yml definition
+ include_vars:
+ file: "{{ deploy_path }}/conf/azure_multiple_vmss_vars.yml"
+ name: azure_multiple_vmss_vars
+ when: use_multiple_vmss
+
+ - name: Delete VM Scale Sets
+ azure_rm_virtualmachinescaleset:
+ resource_group: "{{ resource_group }}"
+ name: "{{ vmss_name }}-{{ item.name_suffix }}"
+ remove_on_absent: all
+ state: absent
+ with_items:
+ - "{{ azure_multiple_vmss_vars.vars_list }}"
+ when: use_multiple_vmss
- name: Delete azure proxy virtual machine if one was created
azure_rm_virtualmachine:
diff --git a/ansible/library/azure_host_role_map.py
b/ansible/library/azure_host_role_map.py
new file mode 100644
index 0000000..4d52c2c
--- /dev/null
+++ b/ansible/library/azure_host_role_map.py
@@ -0,0 +1,92 @@
+#!/usr/bin/python3
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from ansible.module_utils.basic import AnsibleModule
+from collections import defaultdict
+import os
+from os.path import join
+
+deploy_path = os.environ.get("MUCHOS_HOME")
+
+
+def label(hosts, labels):
+ hld = defaultdict(list)
+ for i, host in enumerate(hosts):
+ for tmpLabel, n in labels.items():
+ if i < n:
+ hld[host].append(tmpLabel)
+ return hld
+
+
+def stringify(L, hns):
+ # Flatten the list of dicts
+ labels = {k: v for d in L for k, v in d.items()}
+ label_string_dict = {k: ",".join(v) for k, v in labels.items()}
+ label_list = [
+ "{} = {} {}".format(k, v, hns[k]) for k, v in label_string_dict.items()
+ ]
+ return "\n".join(label_list)
+
+
+def main():
+
+ fields = {
+ "hosts": {"required": True, "type": "list"},
+ "vars_list": {"required": True, "type": "dict"},
+ "cluster_name": {"required": True, "type": "str"},
+ }
+
+ module = AnsibleModule(argument_spec=fields)
+ vars_list = module.params["vars_list"]
+ hosts = module.params["hosts"]
+ cluster_name = module.params["cluster_name"]
+ mp = {x["name_suffix"]: x["roles"] for x in vars_list["vars_list"]}
+ ns_mp = {
+ cluster_name + "-" + x["name_suffix"]: x.get("nameservice_id", "")
+ for x in vars_list["vars_list"]
+ }
+
+ hd = defaultdict(list)
+ for host in hosts:
+ hd[host["key"]].append(host["value"])
+
+ label_tuples = {
+ cluster_name + "-" + k: (hd[cluster_name + "-" + k], v)
+ for k, v in mp.items()
+ }
+
+ hdfs_ns_tuples = {h: ns_mp[k] for k, v in hd.items() for h in v}
+
+ label_lists = [label(*v) for k, v in label_tuples.items()]
+ result_string = str(stringify(label_lists, hdfs_ns_tuples))
+
+ vmss_file = open(join(deploy_path, "conf/azure_vmss_to_hosts.conf"), "w")
+ for key in hd:
+ vmss_file.write("[" + key.replace("-", "_") + "]\n")
+ for value in hd[key]:
+ vmss_file.write(value)
+ vmss_file.write("\n")
+ vmss_file.write("\n")
+ vmss_file.write("\n")
+ vmss_file.close()
+
+ module.exit_json(result=result_string)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/ansible/library/azure_rm_virtualmachinescaleset_nic_list_facts.py
b/ansible/library/azure_rm_virtualmachinescaleset_nic_list_facts.py
index 2244b5a..cc20ea7 100644
--- a/ansible/library/azure_rm_virtualmachinescaleset_nic_list_facts.py
+++ b/ansible/library/azure_rm_virtualmachinescaleset_nic_list_facts.py
@@ -52,9 +52,6 @@ options:
extends_documentation_fragment:
- azure
-author:
- - "Min Pae (@sputnik13)"
-
'''
EXAMPLES = '''
diff --git a/ansible/roles/azure/tasks/assign_msi_multiple_vmss.yml
b/ansible/roles/azure/tasks/assign_msi_multiple_vmss.yml
new file mode 100644
index 0000000..c11e210
--- /dev/null
+++ b/ansible/roles/azure/tasks/assign_msi_multiple_vmss.yml
@@ -0,0 +1,40 @@
+---
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+# These Ansible tasks only run on the client machine where Muchos runs
+# At a high level, the various sections in this file do the following:
+# 1. Create (if not already existing): an Azure resource group, virtual
network / subnet
+# 2. Optionally (if the user specified) create a VM and related resources to
use as a proxy host
+# 3. Create the Azure VMSS to support the nodes for use with Muchos
+# 4. Automatically populate the hosts file and associated [nodes] section in
muchos.props
+#
+
+- name: Assign User assigned Identity to Multiple VMSS
+ azure_rm_resource:
+ resource_group: "{{ resource_group }}"
+ provider: Compute
+ resource_type: virtualMachineScaleSets
+ resource_name: "{{ vmss_name }}-{{ item.name_suffix }}"
+ api_version: '2019-03-01'
+ body:
+ location: "{{ location }}"
+ identity:
+ type: UserAssigned
+ userAssignedIdentities: "{{ UserAssignedIdentityArr|join('') }}"
+ loop:
+ "{{ azure_multiple_vmss_vars.vars_list }}"
diff --git a/ansible/roles/azure/tasks/create_multiple_vmss.yml
b/ansible/roles/azure/tasks/create_multiple_vmss.yml
new file mode 100644
index 0000000..3bac89b
--- /dev/null
+++ b/ansible/roles/azure/tasks/create_multiple_vmss.yml
@@ -0,0 +1,228 @@
+---
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+# These Ansible tasks only run on the client machine where Muchos runs
+# At a high level, the various sections in this file do the following:
+# 1. Create (if not already existing): an Azure resource group, virtual
network / subnet
+# 2. Optionally (if the user specified) create a VM and related resources to
use as a proxy host
+# 3. Create the Azure VMSS to support the nodes for use with Muchos
+# 4. Automatically populate the hosts file and associated [nodes] section in
muchos.props
+#
+# For 1 & 2 it uses create_common_resources.yml & create_optional_proxy.yml
+
+- name: Include azure_multiple_vmss_vars.yml
+ include_vars:
+ file: "{{ deploy_path }}/conf/azure_multiple_vmss_vars.yml"
+ name: azure_multiple_vmss_vars
+
+- name: Create Scale Set
+ azure_rm_virtualmachinescaleset:
+ resource_group: "{{ resource_group }}"
+ location: "{{ location }}"
+ name: "{{ vmss_name }}-{{ item.name_suffix }}"
+ vm_size: "{{ item.sku }}"
+ admin_username: "{{ admin_username }}"
+ ssh_password_enabled: false
+ ssh_public_keys:
+ - path: /home/{{ admin_username }}/.ssh/authorized_keys
+ key_data: "{{ lookup('file', '~/.ssh/id_rsa.pub') }}"
+ capacity: "{{ item.capacity }}"
+ single_placement_group: "{{ False if item.capacity > 100 else omit }}"
+ virtual_network_name: "{{ vnet }}"
+ subnet_name: "{{ subnet }}"
+ upgrade_policy: Manual
+ tier: Standard
+ managed_disk_type: "{{ osdisk_sku }}"
+ os_disk_caching: ReadWrite
+ enable_accelerated_networking: "{{ accnet_capable }}"
+ image:
+ offer: "{{ image_offer if image_offer else omit }}"
+ publisher: "{{ image_publisher if image_publisher else omit }}"
+ sku: "{{ image_sku if image_sku else omit }}"
+ version: "{{ image_version if image_version else omit }}"
+ id: "{{ image_id if image_id else omit }}"
+ data_disks: |
+ {%- set data_disks = [] -%}
+ {%- for lun in range(item.data_disk_count) -%}
+ {%- set _ = data_disks.append({'lun': lun, 'disk_size_gb':
item.data_disk_size_gb, 'managed_disk_type': item.disk_sku }) -%}
+ {%- endfor -%}
+ {{ data_disks }}
+ with_items:
+ - "{{ azure_multiple_vmss_vars.vars_list }}"
+ vars:
+ - image_offer: "{{ azure_image_reference.split('|')[0] }}"
+ - image_publisher: "{{ azure_image_reference.split('|')[1] }}"
+ - image_sku: "{{ azure_image_reference.split('|')[2] }}"
+ - image_version: "{{ azure_image_reference.split('|')[3] }}"
+ - image_id: "{{ azure_image_reference.split('|')[4] }}"
+ - accnet_capable: "{{ True if item.sku in accnet_capable_skus else False }}"
+ - osdisk_sku: "{{ 'Premium_LRS' if item.sku in premiumio_capable_skus else
'Standard_LRS' }}"
+ register: _create_clusters
+ async: 600
+ poll: 0
+ tags: create_multiple_vmss
+
+- name: Wait
+ async_status:
+ jid: "{{ item.ansible_job_id }}"
+ register: _jobs
+ until: _jobs.finished
+ delay: 15
+ retries: 300
+ with_items: "{{ _create_clusters.results }}"
+
+- name: Get VMSS instances
+ azure_rm_virtualmachinescalesetinstance_info:
+ resource_group: "{{ resource_group }}"
+ vmss_name: "{{ vmss_name }}-{{ item.name_suffix }}"
+ register: _vmss_instances
+ with_items:
+ - "{{ azure_multiple_vmss_vars.vars_list }}"
+ async: 600
+ poll: 0
+
+- name: Get VMSS nic list
+ azure_rm_virtualmachinescaleset_nic_list_facts:
+ resource_group: "{{ resource_group }}"
+ vmss_name: "{{ vmss_name }}-{{ item.name_suffix }}"
+ register: _vmss_nic_list
+ with_items:
+ - "{{ azure_multiple_vmss_vars.vars_list }}"
+ async: 600
+ poll: 0
+
+- name: Wait for VMSS instance list operations
+ async_status:
+ jid: "{{ item.ansible_job_id }}"
+ register: vmss_instances
+ until: vmss_instances.finished
+ delay: 15
+ retries: 300
+ with_items: "{{ _vmss_instances.results }}"
+
+- name: Wait for NIC list operations
+ async_status:
+ jid: "{{ item.ansible_job_id }}"
+ register: vmss_nic_list
+ until: vmss_nic_list.finished
+ delay: 15
+ retries: 300
+ with_items: "{{ _vmss_nic_list.results }}"
+
+- name: Get VM hostname to IP mapping
+ set_fact:
+ hostname_ip_pairs: |
+ {%- set vmname_ips = [] -%}
+ {%- if azure_proxy_host is defined and azure_proxy_host -%}
+ {%- set _ = vmname_ips.append({'name': azure_proxy_host, 'ip':
azure_proxy_public_ip.state.ip_address }) -%}
+ {%- endif -%}
+ {%- set vmid_names = {} -%}
+ {%- for vmss in vmss_instances.results -%}
+ {%- for instance in vmss.instances -%}
+ {%- set _ = vmid_names.__setitem__(instance.id,
instance.name.replace('_','-')) -%}
+ {%- endfor -%}
+ {%- endfor -%}
+ {%- set vmid_ips = {} -%}
+ {%- for vmss in vmss_nic_list.results -%}
+ {%- for interface in vmss.networkinterfaces -%}
+ {%- if interface.virtualMachine is defined -%}
+ {%- set _ = vmid_ips.__setitem__(interface.virtualMachine.id,
interface.ipConfigurations[0].privateIPAddress) -%}
+ {%- endif -%}
+ {%- endfor -%}
+ {%- endfor -%}
+ {%- for vmid in vmid_names -%}
+ {%- set _ = vmname_ips.append({'name': vmid_names[vmid], 'ip':
vmid_ips[vmid]}) -%}
+ {%- endfor -%}
+ {{ vmname_ips }}
+
+- name: Ensures hosts sub-dir exists
+ file:
+ path: "{{ deploy_path }}/conf/hosts/"
+ state: directory
+ recurse: yes
+
+- name: Ensures host_vars sub-dir exists
+ file:
+ path: "{{ deploy_path }}/ansible/host_vars/"
+ state: directory
+ recurse: yes
+
+- name: Write hosts file
+ template:
+ src: hostname_ip_mappings.j2
+ dest: "{{ deploy_path }}/conf/hosts/{{ vmss_name }}"
+ mode: 0644
+
+- name: Get vmss to host map
+ set_fact:
+ vmss_host_pairs: |
+ {%- set vmss_host = [] -%}
+ {%- for vmss in vmss_instances.results -%}
+ {%- for instance in vmss.instances -%}
+ {%- set _ = vmss_host.append({'key':
vmss.invocation.module_args.vmss_name, 'value':
instance.name.replace('_','-')}) -%}
+ {%- endfor -%}
+ {%- endfor -%}
+ {{ vmss_host }}
+
+- name: Clear section
+ ini_file:
+ path: "{{ deploy_path }}/conf/muchos.props"
+ section: "nodes"
+ state: absent
+
+- name: Recreate section
+ ini_file:
+ path: "{{ deploy_path }}/conf/muchos.props"
+ section: "nodes"
+ option: "#host0"
+ value: "service"
+ state: present
+
+- name: add azure proxy host
+ lineinfile:
+ path: "{{ deploy_path }}/conf/muchos.props"
+ line: "{{ azure_proxy_host }} = client"
+ when: azure_proxy_host is defined and azure_proxy_host and azure_proxy_host
!= None
+
+- name: Get host-role assignments
+ azure_host_role_map:
+ hosts: "{{ vmss_host_pairs }}"
+ vars_list: "{{ azure_multiple_vmss_vars }}"
+ cluster_name: "{{ vmss_name }}"
+ register: assignments
+
+- name: Write role assignments to muchos.props
+ lineinfile:
+ path: "{{ deploy_path }}/conf/muchos.props"
+ line: "{{ assignments.result }}"
+
+- name: Change proxy hostname to azure proxy host in muchos.props
+ lineinfile:
+ path: "{{ deploy_path }}/conf/muchos.props"
+ regexp: '^proxy_hostname\s*=\s*'
+ line: "proxy_hostname = {{ azure_proxy_host }}"
+ when: azure_proxy_host is defined and azure_proxy_host and azure_proxy_host
!= None
+
+- name: Change proxy hostname to first node in vmss in muchos.props
+ lineinfile:
+ path: "{{ deploy_path }}/conf/muchos.props"
+ regexp: '^proxy_hostname\s*=\s*'
+ line: "proxy_hostname = {{ vmss_host_pairs[0].value }}"
+ when: not (azure_proxy_host is defined and azure_proxy_host and
azure_proxy_host != None)
+
+
diff --git a/ansible/roles/azure/tasks/main.yml
b/ansible/roles/azure/tasks/main.yml
index d252833..0b5dab9 100644
--- a/ansible/roles/azure/tasks/main.yml
+++ b/ansible/roles/azure/tasks/main.yml
@@ -30,7 +30,12 @@
when: use_adlsg2
- import_tasks: create_optional_proxy.yml
- import_tasks: create_vmss.yml
+ when: use_multiple_vmss is not defined or not use_multiple_vmss
- import_tasks: assign_msi_single_vmss.yml
- when: use_adlsg2
+ when: (use_multiple_vmss is not defined or not use_multiple_vmss) and
use_adlsg2
+- import_tasks: create_multiple_vmss.yml
+ when: use_multiple_vmss
+- import_tasks: assign_msi_multiple_vmss.yml
+ when: use_multiple_vmss and use_adlsg2
- import_tasks: create_log_analytics_ws.yml
when: az_oms_integration_needed and (az_logs_id is not defined or (not
az_logs_id) or az_logs_id == None)
diff --git a/conf/.gitignore b/conf/.gitignore
index 0e1a2fc..2181aa7 100644
--- a/conf/.gitignore
+++ b/conf/.gitignore
@@ -2,3 +2,5 @@
/hosts/*
/keys
/user_data
+/azure_vmss_to_hosts.conf
+/azure_multiple_vmss_vars.yml
diff --git a/conf/azure_multiple_vmss_vars.yml.example
b/conf/azure_multiple_vmss_vars.yml.example
new file mode 100644
index 0000000..71c94f4
--- /dev/null
+++ b/conf/azure_multiple_vmss_vars.yml.example
@@ -0,0 +1,94 @@
+---
+vars_list:
+# The below roles are required when HA is enabled (i.e. hdfs_ha = True)
+ - name_suffix: vmss1
+ sku: Standard_D4s_v3
+ perf_profile: azd8s
+ data_disk_count: 4
+ disk_sku: Premium_LRS
+ data_disk_size_gb: 512
+ capacity: 4
+ roles:
+ namenode: 1
+ resourcemanager: 1
+ accumulomaster: 1
+ zookeeper: 1
+ journalnode: 2
+ zkfc: 1
+ client: 4
+ - name_suffix: vmss2
+ sku: Standard_D4s_v3
+ perf_profile: perf-small
+ data_disk_count: 4
+ disk_sku: Standard_LRS
+ data_disk_size_gb: 512
+ capacity: 4
+ roles:
+ zookeeper: 2
+ metrics: 1
+ journalnode: 1
+ namenode: 1
+ zkfc: 1
+ accumulomaster: 1
+ resourcemanager: 1
+ client: 4
+ - name_suffix: vmss3
+ sku: Standard_D4s_v3
+ perf_profile: azd8s
+ data_disk_count: 8
+ disk_sku: Standard_LRS
+ data_disk_size_gb: 1024
+ capacity: 4
+ roles:
+ worker: 4
+
+ # The below roles are required when HA is not enabled (i.e hdfs_ha = False)
+ - name_suffix: vmss4
+ sku: Standard_D4s_v3
+ perf_profile: azd8s
+ data_disk_count: 4
+ disk_sku: Premium_LRS
+ data_disk_size_gb: 512
+ capacity: 3
+ roles:
+ namenode: 1
+ resourcemanager: 1
+ accumulomaster: 1
+ zookeeper: 1
+ client: 3
+ - name_suffix: vmss5
+ sku: Standard_D4s_v3
+ perf_profile: azd8s
+ data_disk_count: 4
+ disk_sku: Premium_LRS
+ data_disk_size_gb: 512
+ capacity: 1
+ roles:
+ metrics: 1
+ client: 1
+ - name_suffix: vmss6
+ sku: Standard_D8s_v3
+ perf_profile: azd8s
+ data_disk_count: 8
+ disk_sku: Standard_LRS
+ data_disk_size_gb: 1024
+ capacity: 3
+ roles:
+ worker: 3
+
+ # The below VMSS definition is provided just as a sample to show how we
+ # can define per-VMSS mount root and disk path and pattern definitions
+ # Using ephemeral storage like the one shown below should not be used for
+ # any case where data persistence is required
+ - name_suffix: vmss7
+ sku: Standard_L16s_v2
+ perf_profile: azd8s
+ azure_disk_device_path: /dev
+ azure_disk_device_pattern: nvme*n1
+ mount_root: /nvmedata
+ data_disk_count: 0
+ disk_sku: Standard_LRS
+ data_disk_size_gb: 1024
+ capacity: 4
+ roles:
+ worker: 4
diff --git a/conf/muchos.props.example b/conf/muchos.props.example
index 54fcefb..4af9e70 100644
--- a/conf/muchos.props.example
+++ b/conf/muchos.props.example
@@ -114,6 +114,8 @@ vnet_cidr = 10.0.0.0/8
subnet = subnet1
# The CIDR prefix used for the single subnet within the virtual network.
subnet_cidr = 10.1.0.0/16
+#Optional. If set to True, will create multiple VMSS based on
multiple_vmss_vars.yml
+use_multiple_vmss = False
# Azure image reference defined as a pipe-delimited string in the format
offer|publisher|sku|version|
# Please refer 'Launching an Azure cluster' section of the README before
making changes
azure_image_reference = CentOS|OpenLogic|7.5|latest|
diff --git a/docs/azure-multiple-vmss.md b/docs/azure-multiple-vmss.md
new file mode 100644
index 0000000..082281f
--- /dev/null
+++ b/docs/azure-multiple-vmss.md
@@ -0,0 +1,34 @@
+# Azure based clusters using multiple Virtual Machine Scale Sets (VMSS)
+By default, Azure based deployments of Accumulo clusters provision a single
[Virtual Machine Scale Set -
VMSS](https://docs.microsoft.com/en-us/azure/virtual-machine-scale-sets/overview).
A VMSS consists of a set of Virtual Machine instances, which are individually
identified by their hostname and private IP address.
+
+## Challenges with a single VMSS deployment
+1. All VM instances in a single VMSS by default are of the same size (CPU, RAM
and disks). This can be a constraint when provisioning larger clusters, wherein
the user might require different resource sizes for leader nodes as compared to
worker nodes.
+1. It may also be required to use different disk types (SSD / HDD / NVME) for
different sets of nodes in the same Muchos cluster. This is not possible when
using a single VMSS deployment.
+1. The `muchos launch` command automatically populates the `nodes` section in
`muchos.props` with these hostnames and IP addresses based on the details of
the VM instances in the VMSS. In the case of a single VMSS deployment,
hard-coded assignment of a minimum (but sufficient) set of roles, to these
nodes is done. As a result, deploying additional roles, such as Fluo, or Spark,
is not possible unless the user manually edits the `muchos.props` file after
the `muchos launch` command, and p [...]
+1. Also, in certain cases, it may be necessary to spawn multiple VMSS
deployments, to overcome
[limits](https://docs.microsoft.com/en-us/azure/azure-resource-manager/management/azure-subscription-service-limits#virtual-machine-scale-sets-limits)
such as the maximum number of VMs in a single VMSS. For example, attempting to
launch a 2000-node Azure cluster through Muchos would not work if deploying
using a single VMSS, as the current limit for VMSS is 1000 VMs in a single VMSS.
+1. Finally, it may be required to assign different perf profiles to different
sets of VMs in the cluster. For example, larger nodes will typically have
larger JVM heap sizes / YARN memory configured as compared to smaller nodes.
+
+## Multiple VMSS deployment
+To address the above challenges, Muchos supports a "multiple VMSS" mode of
installation for Azure clusters. To use this mode, the user needs to:
+1. Set `use_multiple_vmss = True` in `muchos.props`
+1. Create an appropriate `azure_multiple_vmss_vars.yml` file in the
`fluo-muchos/conf` folder
+
+In such a case, the `muchos launch` command will create multiple VMSS
deployments in parallel, and later assign roles to the VM instances within each
VMSS, based on the specification in the `azure_multiple_vmss_vars.yml` file.
Subsequently, `muchos setup` runs without any modifications.
+
+## Format of the mutliple_vmss_vars.yml file
+Muchos provides a [sample file](../conf/azure_multiple_vmss_vars.yml.example)
which can be used as a template to customize. The YAML file is a list of VMSS
specifications. The following fields can be specified for each VMSS:
+
+| Attribute | Required or optional? | Default value | Description |
+|-----------|------------------------|---------|-------------|
+| `name_suffix` | Required | - | The name of each VMSS is constructed by
concatenating the Muchos cluster name with this string. As an example, if your
Muchos cluster is called `test`, and this field has a value of `ldr`, then the
VMSS is created with a name `test-ldr`|
+| `sku` | Required | - | A string identifier specifying the Azure VM size.
Refer to the [Azure
documentation](https://docs.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series)
to lookup these strings. An example VM size is `Standard_D32s_v3` for a
32-vCPU
[Dsv3](https://docs.microsoft.com/en-us/azure/virtual-machines/dv3-dsv3-series#dsv3-series)
VM|
+| `perf_profile` | Required | - | A string identifying a corresponding
performance profile configuration section in muchos.props which contains perf
profile parameters |
+| `azure_disk_device_path`| Optional | If not specified, the corresponding
`azure_disk_device_path` value from the `azure` section in
[muchos.props](../conf/muchos.props.example) is used | This is a device path
used to enumerate attached SCSI or NVME disks to use for persistent local
storage |
+| `azure_disk_device_pattern`| Optional | If not specified, the corresponding
`azure_disk_device_pattern` value from the `azure` section in
[muchos.props](../conf/muchos.props.example) is used | This is a device name
wildcard pattern used (internally) in conjunction with `azure_disk_device_path`
to enumerate attached SCSI or NVME disks to use for persistent local storage |
+| `mount_root`| Optional | If not specified, the corresponding `mount_root`
value from the `azure` section in [muchos.props](../conf/muchos.props.example)
is used | This is the folder in the file system where the persistent disks are
mounted |
+| `data_disk_count`| Required | - | An integer value which specifies the
number of persistent (managed) data disks to be attached to each VM in the
VMSS. It can be 0 in specific cases - see [notes on using ephemeral
storage](./azure-ephemeral-disks.md) for details |
+| `disk_sku`| Required | - | Can be either Standard_LRS (for HDD) or
Premium_LRS (for Premium SSD). At this time, we have not tested the use of
Standard SSD or UltraSSD with Muchos |
+| `data_disk_size_gb`| Required | - | An integer value specifying the size of
each persistent (managed) data disk in GiB |
+| `image_reference`| Optional | If not specified, the corresponding
`azure_image_reference` value from the `azure` section in
[muchos.props](../conf/muchos.props.example) is used | Azure image reference
defined as a pipe-delimited string.
+| `capacity`| Required | - | An integer value specifying the number of VMs in
this specific VMSS |
+| `roles`| Required | - | This is a dictionary (list of key-value pairs), each
of which should be of the form `muchos_role_name`: `integer count`. See [sample
file](../conf/azure_multiple_vmss_vars.yml.example) for examples. the `muchos
launch` command for Azure clusters uses this list to assign roles to hosts in a
sequential fashion. For example, if a given VMSS has 3 `zkfc` role members and
2 `namenode` role members defined, host0 and host1 in the VMSS will be assigned
both `zkfc` and [...]
diff --git a/lib/muchos/azure.py b/lib/muchos/azure.py
index b7ae53e..a12bede 100644
--- a/lib/muchos/azure.py
+++ b/lib/muchos/azure.py
@@ -128,3 +128,139 @@ class VmssCluster(ExistingCluster):
if v.lower() in ("false", "no"):
return False
return v
+
+ # For Azure clusters this method creates Ansible group variables which
+ # allow overriding the "global" host or play variables with group specific
+ # variables. Because Ansible group variables override host variables this
+ # is a very powerful feature to support per-group specialization of
+ # configuration. Currently this is used to define the following:
+ #
+ # 1. Variables for different perf profiles for different groups of hosts
+ # This capability allows specifying different settings for clusters
+ # which have heterogenous hardware - RAM especially
+ #
+ # 2. Different mount roots for different sets of hosts, with a fallback to
+ # using the global mount_root defined in the Ansible hosts file
+ #
+ # 3. Different worker_data_dirs and default_data_dirs for specific groups
+ # of hosts.
+ #
+ # 4. Different Azure disk path and disk name pattern for specific groups
+ # of hosts.
+ def add_specialized_configs(self, hosts_file):
+ if self.config.use_multiple_vmss():
+ vmss_hosts = open(
+ path.join(
+ self.config.deploy_path,
+ "conf/azure_vmss_to_hosts.conf"
+ ),
+ "r",
+ )
+ print("\n", file=hosts_file)
+ for line in vmss_hosts:
+ print(line.rstrip("\n"), file=hosts_file)
+
+ for curr_vmss in self.config.azure_multiple_vmss_vars["vars_list"]:
+ vmss_group_name = (
+ self.config.cluster_name + "-" + curr_vmss["name_suffix"]
+ )
+ profile = curr_vmss["perf_profile"]
+
+ with open(
+ path.join(
+ self.config.deploy_path,
+ "ansible/group_vars/"
+ + vmss_group_name.replace("-", "_"),
+ ),
+ "w",
+ ) as vmss_file:
+ for (name, value) in self.config.items(profile):
+ print("{0}: {1}".format(name, value), file=vmss_file)
+
+ # use VMSS-specific mount root if one is defined or
+ # the global mount root if there is no VMSS-specific value
+ curr_mount_root = curr_vmss.get(
+ "mount_root", self.config.mount_root()
+ )
+
+ # write the mount root out to the per-VMSS group vars
+ print(
+ "{0}: {1}".format("mount_root", curr_mount_root),
+ file=vmss_file,
+ )
+
+ # also include per-VMSS worker_data_dirs
+ curr_worker_dirs = self.config.data_dirs_internal(
+ "worker",
+ curr_vmss["data_disk_count"],
+ curr_mount_root,
+ curr_vmss["sku"],
+ )
+
+ print(
+ "{0}: {1}".format(
+ "worker_data_dirs", curr_worker_dirs,
+ ),
+ file=vmss_file,
+ )
+
+ # per-VMSS default_data_dirs
+ curr_default_dirs = self.config.data_dirs_internal(
+ "default",
+ curr_vmss["data_disk_count"],
+ curr_mount_root,
+ curr_vmss["sku"],
+ )
+
+ print(
+ "{0}: {1}".format(
+ "default_data_dirs", curr_default_dirs,
+ ),
+ file=vmss_file,
+ )
+
+ # also write out per-VMSS disk path and pattern
+ # using the global value from muchos.props as default
+ # if the VMSS does not define a custom value
+ print(
+ "{0}: {1}".format(
+ "azure_disk_device_path",
+ curr_vmss.get(
+ "azure_disk_device_path",
+ self.config.azure_disk_device_path(),
+ ),
+ ),
+ file=vmss_file,
+ )
+
+ print(
+ "{0}: {1}".format(
+ "azure_disk_device_pattern",
+ curr_vmss.get(
+ "azure_disk_device_pattern",
+ self.config.azure_disk_device_pattern(),
+ ),
+ ),
+ file=vmss_file,
+ )
+
+ # these nested loops are a tight (if slightly less
+ # readable way) of creating the various directory ordinals
+ for dirtype in ["default", "worker"]:
+ for ordinal in range(3):
+ print(
+ "{0}: {1}".format(
+ "{0}dir_ordinal{1}".format(
+ dirtype, ordinal
+ ),
+ 0
+ if len(
+ curr_default_dirs
+ if dirtype == "default"
+ else curr_worker_dirs
+ )
+ < ordinal + 1
+ else ordinal,
+ ),
+ file=vmss_file,
+ )
diff --git a/lib/muchos/config/azure.py b/lib/muchos/config/azure.py
index ddc3f89..4a77b02 100644
--- a/lib/muchos/config/azure.py
+++ b/lib/muchos/config/azure.py
@@ -19,6 +19,7 @@ from sys import exit
from .base import BaseConfig
from .decorators import ansible_host_var, is_valid, default
from .validators import is_type, is_in
+from yaml import load, FullLoader
class AzureDeployConfig(BaseConfig):
@@ -40,6 +41,15 @@ class AzureDeployConfig(BaseConfig):
cluster_name,
)
+ # load azure_multiple_vmss_vars.yml
+ if self.use_multiple_vmss():
+ with open(
+ "conf/azure_multiple_vmss_vars.yml"
+ ) as azure_multiple_vmss_vars_file:
+ self.azure_multiple_vmss_vars = load(
+ azure_multiple_vmss_vars_file.read(), Loader=FullLoader
+ )
+
def verify_config(self, action):
self._verify_config(action)
@@ -189,3 +199,9 @@ class AzureDeployConfig(BaseConfig):
@default(None)
def instance_volumes_adls(self):
return self.get("azure", "instance_volumes_adls")
+
+ @ansible_host_var
+ @default(False)
+ @is_valid(is_in([True, False]))
+ def use_multiple_vmss(self):
+ return self.getboolean("azure", "use_multiple_vmss")