This is coming together. Diff comments:
> diff --git a/cloudinit/sources/DataSourceAzure.py > b/cloudinit/sources/DataSourceAzure.py > index 7007d9e..87f15b2 100644 > --- a/cloudinit/sources/DataSourceAzure.py > +++ b/cloudinit/sources/DataSourceAzure.py > @@ -252,6 +263,10 @@ class DataSourceAzure(sources.DataSource): > > dsname = 'Azure' > _negotiated = False > + _metadata_imds = sources.UNSET > + > + # Regenerate network config new_instance boot and every boot > + update_events = {'network': [EventType.BOOT_NEW_INSTANCE, > EventType.BOOT]} The prototype yaml included a version and 'when' as the attribute which accepted the list of events. Is this meant to model that? updates: policy-version: [1] # default to 1 network: when: [never, per-instance, boot, boot-change, always] watch-url: http://..../ storage: when: > > def __init__(self, sys_cfg, distro, paths): > sources.DataSource.__init__(self, sys_cfg, distro, paths) > @@ -373,41 +390,86 @@ class DataSourceAzure(sources.DataSource): > except NonAzureDataSource: > continue > except BrokenAzureDataSource as exc: > - raise exc > + msg = 'BrokenAzureDataSource: %s' % exc > + raise sources.InvalidMetaDataException(msg) > except util.MountFailedError: > LOG.warning("%s was not mountable", cdev) > continue > > if reprovision or self._should_reprovision(ret): > ret = self._reprovision() > - (md, self.userdata_raw, cfg, files) = ret > + imds_md = get_metadata_from_imds( > + self.fallback_interface, retries=3) > + (md, userdata_raw, cfg, files) = ret > self.seed = cdev > - self.metadata = util.mergemanydict([md, DEFAULT_METADATA]) > - self.cfg = util.mergemanydict([cfg, BUILTIN_CLOUD_CONFIG]) > + crawled_data.update({ > + 'cfg': cfg, > + 'files': files, > + 'metadata': util.mergemanydict( > + [md, {'imds': imds_md}]), > + 'userdata_raw': userdata_raw}) > found = cdev > > LOG.debug("found datasource in %s", cdev) > break > > if not found: > - return False > + raise sources.InvalidMetaDataException('No Azure metadata found') > > if found == ddir: > LOG.debug("using files cached in %s", ddir) > > seed = _get_random_seed() > if seed: > - self.metadata['random_seed'] = seed > + crawled_data['metadata']['random_seed'] = seed > + crawled_data['metadata']['instance-id'] = util.read_dmi_data( > + 'system-uuid') > + return crawled_data > + > + def is_platform_viable(self): check_platform_id() ? we have a check_instance_id() > + """Check platform environment to report if this datasource may > run.""" > + asset_tag = util.read_dmi_data('chassis-asset-tag') > + if asset_tag != AZURE_CHASSIS_ASSET_TAG: > + LOG.debug("Non-Azure DMI asset tag '%s' discovered.", asset_tag) > + return False > + return True > + > + def clear_cached_attrs(self): > + """Reset any cached class attributes to defaults.""" > + super(DataSourceAzure, self).clear_cached_attrs() > + self._metadata_imds = sources.UNSET > + Do you think we should deprecate the get_data property() so callers can be notified to change? That is, I think we want to stop using only get_data() since we're breaking this into the various stages (crawl, process, cache) right? > + def _get_data(self): > + """Crawl and process datasource metadata caching metadata as attrs. > + > + @return: True on success, False on error, invalid or disabled > + datasource. > + """ > + if not self.is_platform_viable(): > + return False > + try: > + crawled_data = util.log_time( > + logfunc=LOG.debug, msg='Crawl of metadata service', > + func=self.crawl_metadata) > + except sources.InvalidMetaDataException as e: > + LOG.warning('Could not crawl Azure metadata: %s', e) > + return False > + > + # Process crawled data and augment with various config defaults This looks like it should be a class function passing in the crawled data right? > + self.cfg = util.mergemanydict( > + [crawled_data['cfg'], BUILTIN_CLOUD_CONFIG]) > + self._metadata_imds = crawled_data['metadata']['imds'] > + self.metadata = util.mergemanydict( > + [crawled_data['metadata'], DEFAULT_METADATA]) > + self.userdata_raw = crawled_data['userdata_raw'] > > user_ds_cfg = util.get_cfg_by_path(self.cfg, DS_CFG_PATH, {}) > self.ds_cfg = util.mergemanydict([user_ds_cfg, self.ds_cfg]) > > # walinux agent writes files world readable, but expects > # the directory to be protected. > - write_files(ddir, files, dirmode=0o700) > - > - self.metadata['instance-id'] = util.read_dmi_data('system-uuid') > - > + write_files( > + self.ds_cfg['data_dir'], crawled_data['files'], dirmode=0o700) > return True > > def device_name_to_device(self, name): > @@ -543,22 +605,56 @@ class DataSourceAzure(sources.DataSource): > @property > def network_config(self): > """Generate a network config like net.generate_fallback_network() > with > - the following execptions. > + the following execeptions. heh, exceptions > > 1. Probe the drivers of the net-devices present and inject them in > the network configuration under params: driver: <driver> value > 2. Generate a fallback network config that does not include any of > the blacklisted devices. > """ > - blacklist = ['mlx4_core'] > if not self._network_config: > - LOG.debug('Azure: generating fallback configuration') > - # generate a network config, blacklist picking any mlx4_core devs > - netconfig = net.generate_fallback_config( > - blacklist_drivers=blacklist, config_driver=True) > + if self._metadata_imds != sources.UNSET and self._metadata_imds: > + netconfig = {'version': 2, 'ethernets': {}} This needs to be a function for easier testing. > + LOG.debug('Azure: generating network configuration from > IMDS') > + if self.distro and self.distro.name == 'ubuntu': > + remove_ubuntu_network_config_scripts() > + network_metadata = self._metadata_imds['network'] > + for idx, intf in enumerate(network_metadata['interface']): > + nicname = 'eth{idx}'.format(idx=idx) > + dev_config = {} > + for addr4 in intf['ipv4']['ipAddress']: > + privateIpv4 = addr4['privateIpAddress'] > + if privateIpv4: > + if dev_config.get('dhcp4', False): > + # Append static address config for nic > 1 > + netPrefix = intf['ipv4']['subnet'][0].get( > + 'prefix', '24') > + if not dev_config.get('addresses'): > + dev_config['addresses'] = [] > + dev_config['addresses'].append( > + '{ip}/{prefix}'.format( > + ip=privateIpv4, prefix=netPrefix)) > + else: > + dev_config['dhcp4'] = True > + for addr6 in intf['ipv6']['ipAddress']: > + privateIpv6 = addr6['privateIpAddress'] > + if privateIpv6: > + dev_config['dhcp6'] = True > + break > + if dev_config: > + mac = ':'.join(re.findall(r'..', intf['macAddress'])) > + dev_config.update( > + {'match': {'macaddress': mac.lower()}, > + 'set-name': nicname}) > + netconfig['ethernets'][nicname] = dev_config > + else: > + blacklist = ['mlx4_core'] > + LOG.debug('Azure: generating fallback configuration') > + # generate a network config, blacklist picking mlx4_core devs > + netconfig = net.generate_fallback_config( > + blacklist_drivers=blacklist, config_driver=True) > > self._network_config = netconfig > - > return self._network_config > > > @@ -1025,6 +1121,89 @@ def load_azure_ds_dir(source_dir): > return (md, ud, cfg, {'ovf-env.xml': contents}) > > > +def get_metadata_from_imds(fallback_nic, retries): > + """Query Azure's network metadata service, returning a dictionary. > + > + If network is not up, setup ephemeral dhcp on fallback_nic to talk to the > + IMDS. For more info on IMDS: > + > https://docs.microsoft.com/en-us/azure/virtual-machines/windows/instance-metadata-service > + > + @param fallback_nic: String. The name of the nic which requires active > + networ in order to query IMDS. > + @param retries: The number of retries of the IMDS_URL. > + > + @return: A dict of instance metadata containing compute and network > + info. > + """ > + if net.is_up(fallback_nic): > + return util.log_time( > + logfunc=LOG.debug, > + msg='Crawl of Azure Instance Metadata Service (IMDS)', > + func=_get_metadata_from_imds, args=(retries,)) > + else: > + with EphemeralDHCPv4(fallback_nic): > + return util.log_time( > + logfunc=LOG.debug, > + msg='Crawl of Azure Instance Metadata Service (IMDS)', > + func=_get_metadata_from_imds, args=(retries,)) > + > + > +def _get_metadata_from_imds(retries): > + > + def retry_on_url_error(msg, exception): > + if isinstance(exception, UrlError) and exception.code == 404: > + return True # Continue retries > + return False # Stop retries on all other exceptions, including 404s > + > + url = IMDS_URL + "instance?api-version=2017-12-01" I see at least two api version values, can we centralize these into variables that explain what the data means or the expected format of this version? > + headers = {"Metadata": "true"} > + try: > + response = readurl( > + url, timeout=1, headers=headers, retries=retries, timeout=1 ? Is the default timeout too long? to short? > + exception_cb=retry_on_url_error) > + except Exception as e: > + LOG.debug('Ignoring IMDS instance metadata: %s', e) > + return {} > + try: > + return util.load_json(str(response)) > + except json.decoder.JSONDecodeError: > + LOG.warning( > + 'Ignoring non-json IMDS instance metadata: %s', str(response)) > + return {} > + > + > +def remove_ubuntu_network_config_scripts(paths=None): We typically prepend maybe_ since it's possible that in new images we don't need to do this. Would it be useful to mark a flag on the ds object to indicate whether we've removed those or not? > + """Remove Azure-specific ubuntu network config for non-primary nics. > + > + @param paths: List of networking scripts or directories to remove when > + present. > + > + In certain supported ubuntu images, static udev rules or netplan yaml > + config is delivered in the base ubuntu image to support dhcp on any > + additional interfaces which get attached by a customer at some point > + after initial boot. Since the Azure datasource can now regenerate > + network configuration as metadata reports these new devices, we no longer > + want the udev rules or netplan's 90-azure-hotplug.yaml to configure > + networking on eth1 or greater as it might collide with cloud-init's > + configuration. > + > + Remove the any existing extended network scripts if the datasource is > + enabled to write network per-boot. > + """ > + if not paths: > + paths = UBUNTU_EXTENDED_NETWORK_SCRIPTS > + LOG.debug( > + 'Removing Ubuntu extended network scripts because cloud-init updates' > + ' Azure network configuration on the following event: %s.', > + EventType.BOOT) > + for path in paths: > + if os.path.exists(path): > + if os.path.isdir(path): > + util.del_dir(path) > + else: > + util.del_file(path) > + > + > class BrokenAzureDataSource(Exception): > pass > > diff --git a/tests/unittests/test_datasource/test_azure.py > b/tests/unittests/test_datasource/test_azure.py > index e82716e..fbc9d2d 100644 > --- a/tests/unittests/test_datasource/test_azure.py > +++ b/tests/unittests/test_datasource/test_azure.py > @@ -77,9 +83,110 @@ def construct_valid_ovf_env(data=None, pubkeys=None, > return content > > > +NETWORK_METADATA = { What URL path and api-version is this? > + "network": { > + "interface": [ > + { > + "macAddress": "000D3A047598", > + "ipv6": { > + "ipAddress": [] > + }, > + "ipv4": { > + "subnet": [ > + { > + "prefix": "24", > + "address": "10.0.0.0" > + } > + ], > + "ipAddress": [ > + { > + "privateIpAddress": "10.0.0.4", > + "publicIpAddress": "104.46.124.81" > + } > + ] > + } > + } > + ] > + } > +} > + > + > +class TestGetMetadataFromIMDS(HttprettyTestCase): > + > + with_logs = True > + > + def setUp(self): > + super(TestGetMetadataFromIMDS, self).setUp() > + self.network_md_url = dsaz.IMDS_URL + > "instance?api-version=2017-12-01" > + > + @mock.patch('cloudinit.sources.DataSourceAzure.readurl') > + @mock.patch('cloudinit.sources.DataSourceAzure.EphemeralDHCPv4') > + @mock.patch('cloudinit.sources.DataSourceAzure.net.is_up') > + def test_get_metadata_does_not_dhcp_if_network_is_up( > + self, m_net_is_up, m_dhcp, m_readurl): > + """Do not perform DHCP setup when nic is already up.""" > + m_net_is_up.return_value = True > + m_readurl.return_value = url_helper.StringResponse( > + json.dumps(NETWORK_METADATA).encode('utf-8')) > + self.assertEqual( > + NETWORK_METADATA, > + dsaz.get_metadata_from_imds('eth9', retries=3)) > + > + m_net_is_up.assert_called_with('eth9') > + m_dhcp.assert_not_called() > + self.assertIn( > + "Crawl of Azure Instance Metadata Service (IMDS) took", # > log_time > + self.logs.getvalue()) > + > + @mock.patch('cloudinit.sources.DataSourceAzure.readurl') > + @mock.patch('cloudinit.sources.DataSourceAzure.EphemeralDHCPv4') > + @mock.patch('cloudinit.sources.DataSourceAzure.net.is_up') > + def test_get_metadata_performs_dhcp_when_network_is_down( > + self, m_net_is_up, m_dhcp, m_readurl): > + """Do not perform DHCP setup when nic is already up.""" comment doesn't match test-case. > + m_net_is_up.return_value = False > + m_readurl.return_value = url_helper.StringResponse( > + json.dumps(NETWORK_METADATA).encode('utf-8')) > + > + self.assertEqual( > + NETWORK_METADATA, > + dsaz.get_metadata_from_imds('eth9', retries=2)) > + > + m_net_is_up.assert_called_with('eth9') > + m_dhcp.assert_called_with('eth9') > + self.assertIn( > + "Crawl of Azure Instance Metadata Service (IMDS) took", # > log_time > + self.logs.getvalue()) > + > + m_readurl.assert_called_with( > + self.network_md_url, exception_cb=mock.ANY, > + headers={'Metadata': 'true'}, retries=2, timeout=1) > + > + @mock.patch('cloudinit.url_helper.time.sleep') > + @mock.patch('cloudinit.sources.DataSourceAzure.net.is_up') > + def test_get_metadata_from_imds_empty_when_no_imds_present( > + self, m_net_is_up, m_sleep): > + """Return empty dict when IMDS network metadata is absent.""" > + httpretty.register_uri( > + httpretty.GET, > + dsaz.IMDS_URL + 'instance?api-version=2017-12-01', > + body={}, status=404) > + > + m_net_is_up.return_value = True # skips dhcp > + > + self.assertEqual({}, dsaz.get_metadata_from_imds('eth9', retries=2)) > + > + m_net_is_up.assert_called_with('eth9') > + self.assertEqual([mock.call(1), mock.call(1)], > m_sleep.call_args_list) > + self.assertIn( > + "Crawl of Azure Instance Metadata Service (IMDS) took", # > log_time > + self.logs.getvalue()) > + > + > class TestAzureDataSource(CiTestCase): > > with_logs = True > + maxDiff = None Should this go in CiTestCase by default? > > def setUp(self): > super(TestAzureDataSource, self).setUp() > @@ -1188,6 +1430,24 @@ class TestCanDevBeReformatted(CiTestCase): > "(datasource.Azure.never_destroy_ntfs)", msg) > > > +class TestClearCachedData(CiTestCase): > + > + def test_clear_cached_attrs_clears_imds(self): > + """All class attributes are reset to defaults, including imds > data.""" > + tmp = self.tmp_dir() > + paths = helpers.Paths( > + {'cloud_dir': tmp, 'run_dir': tmp}) > + dsrc = dsaz.DataSourceAzure({}, distro=None, paths=paths) > + dsrc.metadata = 'md' > + dsrc.userdata = 'ud' > + dsrc._metadata_imds = 'imds' > + dsrc._dirty_cache = True > + dsrc.clear_cached_attrs() > + self.assertEqual({}, dsrc.metadata) > + self.assertEqual(None, dsrc.userdata) > + self.assertEqual(UNSET, dsrc._metadata_imds) I'm not super happy that we have *three* different values as the default. I'd like to not have to know this in the unittest. Can we instantiate a new ds instance. deep_copy it, and then assert the 3 fields are equal after clear_cache() instead? > + > + > class TestAzureNetExists(CiTestCase): > > def test_azure_net_must_exist_for_legacy_objpkl(self): > @@ -1398,4 +1658,52 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): > self.assertEqual(m_net.call_count, 1) > > > +class TestRemoveUbuntuNetworkConfigScripts(CiTestCase): > + > + with_logs = True > + > + def setUp(self): > + super(TestRemoveUbuntuNetworkConfigScripts, self).setUp() > + self.tmp = self.tmp_dir() > + > + def test_remove_network_scripts_removes_both_files_and_directories(self): > + """Any files or directories in paths are removed when present.""" > + file1 = self.tmp_path('file1', dir=self.tmp) > + subdir = self.tmp_path('sub1', dir=self.tmp) > + subfile = self.tmp_path('leaf1', dir=subdir) > + write_file(file1, 'file1content') Can we instead mock the os.listdirs os.path.exists? > + write_file(subfile, 'leafcontent') > + dsaz.remove_ubuntu_network_config_scripts(paths=[subdir, file1]) > + > + for path in (file1, subdir, subfile): > + self.assertFalse(os.path.exists(path), > + 'Found unremoved: %s' % path) > + > + expected_logs = [ > + 'Removing Ubuntu extended network scripts because cloud-init' > + ' updates Azure network configuration on the following event:' > + ' System boot.', > + 'Recursively deleting %s' % subdir, > + 'Attempting to remove %s' % file1] > + for log in expected_logs: > + self.assertIn(log, self.logs.getvalue()) > + > + def > test_remove_network_scripts_only_attemts_removal_if_path_exists(self): > + """Any files or directories absent are skipped without error.""" > + dsaz.remove_ubuntu_network_config_scripts(paths=[ > + '/not/a/dir/', '/not/a/file']) > + self.assertNotIn('/not/a', self.logs.getvalue()) # No delete logs > + > + @mock.patch('cloudinit.sources.DataSourceAzure.os.path.exists') > + def test_remove_network_scripts_default_removes_stock_scripts(self, > + m_exists): > + """Azure's stock ubuntu image scripts and artifacts are removed.""" > + # Report path absent on all to avoid delete operation > + m_exists.return_value = False > + dsaz.remove_ubuntu_network_config_scripts() > + calls = m_exists.call_args_list > + for path in dsaz.UBUNTU_EXTENDED_NETWORK_SCRIPTS: > + self.assertIn(mock.call(path), calls) > + > + > # vi: ts=4 expandtab -- https://code.launchpad.net/~chad.smith/cloud-init/+git/cloud-init/+merge/348704 Your team cloud-init commiters is requested to review the proposed merge of ~chad.smith/cloud-init:feature/azure-network-on-boot into cloud-init:master. _______________________________________________ Mailing list: https://launchpad.net/~cloud-init-dev Post to : cloud-init-dev@lists.launchpad.net Unsubscribe : https://launchpad.net/~cloud-init-dev More help : https://help.launchpad.net/ListHelp