[Cloud-init-dev] [Merge] ~vtqanh/cloud-init:migrate-lp-to-github into cloud-init:master
Anh Vo (MSFT) has proposed merging ~vtqanh/cloud-init:migrate-lp-to-github into cloud-init:master. Commit message: lp-to-git-users: adding anhvoms Mapped from vtqanh Requested reviews: cloud-init Commiters (cloud-init-dev) For more details, see: https://code.launchpad.net/~vtqanh/cloud-init/+git/cloud-init/+merge/387447 -- Your team cloud-init Commiters is requested to review the proposed merge of ~vtqanh/cloud-init:migrate-lp-to-github into cloud-init:master. diff --git a/tools/.lp-to-git-user b/tools/.lp-to-git-user index 32cc1fa..89422db 100644 --- a/tools/.lp-to-git-user +++ b/tools/.lp-to-git-user @@ -28,5 +28,6 @@ "rjschwei": "rjschwei", "tribaal": "chrisglass", "trstringer": "trstringer", + "vtqanh": "anhvoms", "xiaofengw": "xiaofengw-vmware" } \ No newline at end of file ___ Mailing list: https://launchpad.net/~cloud-init-dev Post to : cloud-init-dev@lists.launchpad.net Unsubscribe : https://launchpad.net/~cloud-init-dev More help : https://help.launchpad.net/ListHelp
[Cloud-init-dev] [Merge] ~vtqanh/cloud-init:UpdateReporterForAnalyze into cloud-init:master
Anh Vo (MSFT) has proposed merging ~vtqanh/cloud-init:UpdateReporterForAnalyze into cloud-init:master. Commit message: analyze: fix poor formatting due to additional datasource events DataSourceAzure is emitting additional events into the cloud-init log which causes analyze module to produce somewhat confusing output. This is due to two issues: 1) DataSourceAzure does not emit the stage (e.g., init-local) and analyze expects to see it in the event output. 2) analyze did not correctly process nested stages. This change saves the stage name into the reporting module so that other reporter can use it to know which stage it is in and fixes the analyze module to process multiple levels of nested events. Requested reviews: cloud-init commiters (cloud-init-dev) For more details, see: https://code.launchpad.net/~vtqanh/cloud-init/+git/cloud-init/+merge/370156 -- Your team cloud-init commiters is requested to review the proposed merge of ~vtqanh/cloud-init:UpdateReporterForAnalyze into cloud-init:master. diff --git a/cloudinit/analyze/show.py b/cloudinit/analyze/show.py index 3e778b8..b15cd2c 100644 --- a/cloudinit/analyze/show.py +++ b/cloudinit/analyze/show.py @@ -94,6 +94,10 @@ def event_parent(event): return None +def event_is_stage(event): +return '/' not in event_name(event) + + def event_timestamp(event): return float(event.get('timestamp')) @@ -146,7 +150,9 @@ def generate_records(events, blame_sort=False, next_evt = None if event_type(event) == 'start': -if event.get('name') in stages_seen: +stage_name = event_parent(event) +if stage_name == event_name(event) and stage_name in stages_seen: +# new boot record records.append(total_time_record(total_time)) boot_records.append(records) records = [] @@ -166,19 +172,26 @@ def generate_records(events, blame_sort=False, event, next_evt))) else: -# This is a parent event -records.append("Starting stage: %s" % event.get('name')) -unprocessed.append(event) -stages_seen.append(event.get('name')) -continue +if event_is_stage(event): +records.append("Starting stage: %s" % event.get('name')) +unprocessed.append(event) +stages_seen.append(event.get('name')) +else: +# Start of a substage event +records.append(format_record(print_format, + event_record(start_time, + event, + next_evt))) + else: prev_evt = unprocessed.pop() if event_name(event) == event_name(prev_evt): -record = event_record(start_time, prev_evt, event) -records.append(format_record("Finished stage: " - "(%n) %d seconds ", - record) + "\n") -total_time += record.get('delta') +if event_is_stage(event): +record = event_record(start_time, prev_evt, event) +records.append(format_record("Finished stage: " + "(%n) %d seconds ", + record) + "\n") +total_time += record.get('delta') else: # not a match, put it back unprocessed.append(prev_evt) diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py index a5446da..bcac69e 100644 --- a/cloudinit/cmd/main.py +++ b/cloudinit/cmd/main.py @@ -885,7 +885,7 @@ def main(sysv_args=None): report_on = False args.reporter = events.ReportEventStack( -rname, rdesc, reporting_enabled=report_on) +rname, rdesc, reporting_enabled=report_on, global_stage=rname) with args.reporter: retval = util.log_time( diff --git a/cloudinit/reporting/events.py b/cloudinit/reporting/events.py index e5dfab3..2499849 100644 --- a/cloudinit/reporting/events.py +++ b/cloudinit/reporting/events.py @@ -28,13 +28,14 @@ class _nameset(set): status = _nameset(("SUCCESS", "WARN", "FAIL")) +reporting_stage = None class ReportingEvent(object): """Encapsulation of event formatting.""" def __init__(self, event_type, name, description, - origin=DEFAULT_EVENT_ORIGIN, timestamp=None): + origin=DEFAULT_EVENT_ORIGIN, timestamp
[Cloud-init-dev] [Merge] ~vtqanh/cloud-init:ProvisioningTelemetry into cloud-init:master
Anh Vo (MSFT) has proposed merging ~vtqanh/cloud-init:ProvisioningTelemetry into cloud-init:master. Commit message: Azure: Record boot timestamps, system information, and diagnostic events Collect and record the following information through KVP: + timestamps related to kernel initialization and systemd activation of cloud-init services + system information including cloud-init version, kernel version, distro version, and python version + diagnostic events for the most common provisioning error issues such as empty dhcp lease, corrupted ovf-env.xml, etc. Requested reviews: cloud-init commiters (cloud-init-dev) For more details, see: https://code.launchpad.net/~vtqanh/cloud-init/+git/cloud-init/+merge/369785 -- Your team cloud-init commiters is requested to review the proposed merge of ~vtqanh/cloud-init:ProvisioningTelemetry into cloud-init:master. diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index d2fad9b..f83b27e 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -28,7 +28,10 @@ from cloudinit.reporting import events from cloudinit.sources.helpers.azure import (azure_ds_reporter, azure_ds_telemetry_reporter, - get_metadata_from_fabric) + get_metadata_from_fabric, + get_boot_telemetry, + get_system_info, + report_diagnostic_event) LOG = logging.getLogger(__name__) @@ -354,7 +357,7 @@ class DataSourceAzure(sources.DataSource): bname = str(pk['fingerprint'] + ".crt") fp_files += [os.path.join(ddir, bname)] LOG.debug("ssh authentication: " - "using fingerprint from fabirc") + "using fingerprint from fabric") with events.ReportEventStack( name="waiting-for-ssh-public-key", @@ -419,12 +422,17 @@ class DataSourceAzure(sources.DataSource): ret = load_azure_ds_dir(cdev) except NonAzureDataSource: +report_diagnostic_event( +"Did not find Azure data source in %s" % cdev) continue except BrokenAzureDataSource as exc: msg = 'BrokenAzureDataSource: %s' % exc +report_diagnostic_event(msg) raise sources.InvalidMetaDataException(msg) except util.MountFailedError: -LOG.warning("%s was not mountable", cdev) +msg = '%s was not mountable' % cdev +report_diagnostic_event(msg) +LOG.warning(msg) continue perform_reprovision = reprovision or self._should_reprovision(ret) @@ -432,6 +440,7 @@ class DataSourceAzure(sources.DataSource): if util.is_FreeBSD(): msg = "Free BSD is not supported for PPS VMs" LOG.error(msg) +report_diagnostic_event(msg) raise sources.InvalidMetaDataException(msg) ret = self._reprovision() imds_md = get_metadata_from_imds( @@ -450,7 +459,9 @@ class DataSourceAzure(sources.DataSource): break if not found: -raise sources.InvalidMetaDataException('No Azure metadata found') +msg = 'No Azure metadata found' +report_diagnostic_event(msg) +raise sources.InvalidMetaDataException(msg) if found == ddir: LOG.debug("using files cached in %s", ddir) @@ -469,9 +480,13 @@ class DataSourceAzure(sources.DataSource): self._report_ready(lease=self._ephemeral_dhcp_ctx.lease) self._ephemeral_dhcp_ctx.clean_network() # Teardown ephemeral else: -with EphemeralDHCPv4() as lease: -self._report_ready(lease=lease) - +try: +with EphemeralDHCPv4() as lease: +self._report_ready(lease=lease) +except Exception as e: +report_diagnostic_event( +"exception while reporting ready: %s" % e) +raise return crawled_data def _is_platform_viable(self): @@ -493,6 +508,16 @@ class DataSourceAzure(sources.DataSource): if not self._is_platform_viable(): return False try: +get_boot_telemetry(self.distro) +except Exception as e: +LOG.warning("Failed to get boot telemetry: %s", e) + +try: +get_system_info() +except Exception as e:
[Cloud-init-dev] [Merge] ~vtqanh/cloud-init:adjustIMDSTimeout into cloud-init:master
Anh Vo (MSFT) has proposed merging ~vtqanh/cloud-init:adjustIMDSTimeout into cloud-init:master. Commit message: DataSourceAzure: Adjust timeout for polling IMDS If the IMDS primary server is not available, falling back to the secondary server takes about 1s. The net result is that the expected E2E time is slightly more than 1s. This change increases the timeout to 2s to prevent the infinite loop of timeouts. Requested reviews: cloud-init commiters (cloud-init-dev) For more details, see: https://code.launchpad.net/~vtqanh/cloud-init/+git/cloud-init/+merge/367082 -- Your team cloud-init commiters is requested to review the proposed merge of ~vtqanh/cloud-init:adjustIMDSTimeout into cloud-init:master. diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 6416525..b7440c1 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -57,7 +57,12 @@ AZURE_CHASSIS_ASSET_TAG = '7783-7084-3265-9085-8269-3286-77' REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds" REPORTED_READY_MARKER_FILE = "/var/lib/cloud/data/reported_ready" AGENT_SEED_DIR = '/var/lib/waagent' + +# In the event where the IMDS primary server is not +# available, it takes 1s to fallback to the secondary one +IMDS_TIMEOUT_IN_SECONDS = 2 IMDS_URL = "http://169.254.169.254/metadata/; + PLATFORM_ENTROPY_SOURCE = "/sys/firmware/acpi/tables/OEM0" # List of static scripts and network config artifacts created by @@ -582,9 +587,9 @@ class DataSourceAzure(sources.DataSource): return self._ephemeral_dhcp_ctx.clean_network() else: -return readurl(url, timeout=1, headers=headers, - exception_cb=exc_cb, infinite=True, - log_req_resp=False).contents +return readurl(url, timeout=IMDS_TIMEOUT_IN_SECONDS, + headers=headers, exception_cb=exc_cb, + infinite=True, log_req_resp=False).contents except UrlError: # Teardown our EphemeralDHCPv4 context on failure as we retry self._ephemeral_dhcp_ctx.clean_network() @@ -1291,8 +1296,8 @@ def _get_metadata_from_imds(retries): headers = {"Metadata": "true"} try: response = readurl( -url, timeout=1, headers=headers, retries=retries, -exception_cb=retry_on_url_exc) +url, timeout=IMDS_TIMEOUT_IN_SECONDS, headers=headers, +retries=retries, exception_cb=retry_on_url_exc) except Exception as e: LOG.debug('Ignoring IMDS instance metadata: %s', e) return {} diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py index ab77c03..427ab7e 100644 --- a/tests/unittests/test_datasource/test_azure.py +++ b/tests/unittests/test_datasource/test_azure.py @@ -163,7 +163,8 @@ class TestGetMetadataFromIMDS(HttprettyTestCase): m_readurl.assert_called_with( self.network_md_url, exception_cb=mock.ANY, -headers={'Metadata': 'true'}, retries=2, timeout=1) +headers={'Metadata': 'true'}, retries=2, +timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS) @mock.patch('cloudinit.url_helper.time.sleep') @mock.patch(MOCKPATH + 'net.is_up') @@ -1791,7 +1792,8 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): headers={'Metadata': 'true', 'User-Agent': 'Cloud-Init/%s' % vs() - }, method='GET', timeout=1, + }, method='GET', +timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS, url=full_url)]) self.assertEqual(m_dhcp.call_count, 2) m_net.assert_any_call( @@ -1828,7 +1830,9 @@ class TestAzureDataSourcePreprovisioning(CiTestCase): headers={'Metadata': 'true', 'User-Agent': 'Cloud-Init/%s' % vs()}, -method='GET', timeout=1, url=full_url)]) +method='GET', +timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS, +url=full_url)]) self.assertEqual(m_dhcp.call_count, 2) m_net.assert_any_call( broadcast='192.168.2.255', interface='eth9', ip='192.168.2.9', ___ Mailing list: https://launchpad.net/~cloud-init-dev Post to : cloud-init-dev@lists.launchpad.net Unsubscribe : https://launchpad.net/~cloud-init-dev More help : https://help.launchpad.net/ListHelp
[Cloud-init-dev] [Merge] ~vtqanh/cloud-init:ImproveHyperVKvpReporter into cloud-init:master
Anh Vo (MSFT) has proposed merging ~vtqanh/cloud-init:ImproveHyperVKvpReporter into cloud-init:master. Commit message: Azure: Changes to the Hyper-V KVP Reporter + Truncate KVP Pool file to prevent stale entries from being processed by the Hyper-V KVP reporter. + No longer update previous entries in the KVP pool as this is not desirable and potentially has negative impact to performance. + Batch appending of existing KVP entries to reduce performance impact Requested reviews: cloud-init commiters (cloud-init-dev) For more details, see: https://code.launchpad.net/~vtqanh/cloud-init/+git/cloud-init/+merge/366044 -- Your team cloud-init commiters is requested to review the proposed merge of ~vtqanh/cloud-init:ImproveHyperVKvpReporter into cloud-init:master. diff --git a/cloudinit/reporting/handlers.py b/cloudinit/reporting/handlers.py old mode 100644 new mode 100755 index 6d23558..7cca47e --- a/cloudinit/reporting/handlers.py +++ b/cloudinit/reporting/handlers.py @@ -5,7 +5,6 @@ import fcntl import json import six import os -import re import struct import threading import time @@ -14,6 +13,7 @@ from cloudinit import log as logging from cloudinit.registry import DictRegistry from cloudinit import (url_helper, util) from datetime import datetime +from six.moves.queue import Empty as QueueEmptyError if six.PY2: from multiprocessing.queues import JoinableQueue as JQueue @@ -129,24 +129,46 @@ class HyperVKvpReportingHandler(ReportingHandler): DESC_IDX_KEY = 'msg_i' JSON_SEPARATORS = (',', ':') KVP_POOL_FILE_GUEST = '/var/lib/hyperv/.kvp_pool_1' +_already_truncated_pool_file = False def __init__(self, kvp_file_path=KVP_POOL_FILE_GUEST, event_types=None): super(HyperVKvpReportingHandler, self).__init__() self._kvp_file_path = kvp_file_path +HyperVKvpReportingHandler._truncate_guest_pool_file( +self._kvp_file_path) + self._event_types = event_types self.q = JQueue() -self.kvp_file = None self.incarnation_no = self._get_incarnation_no() self.event_key_prefix = u"{0}|{1}".format(self.EVENT_PREFIX, self.incarnation_no) -self._current_offset = 0 self.publish_thread = threading.Thread( target=self._publish_event_routine) self.publish_thread.daemon = True self.publish_thread.start() +@classmethod +def _truncate_guest_pool_file(cls, kvp_file): +""" +Truncate the pool file if it has not been truncated since boot. +This should be done exactly once for the file indicated by +KVP_POOL_FILE_GUEST constant above. This method takes a filename +so that we can use an arbitrary file during unit testing. +""" +if cls._already_truncated_pool_file: +return +boot_time = time.time() - float(util.uptime()) +try: +if os.path.getmtime(kvp_file) < boot_time: +with open(kvp_file, "w"): +pass +except (OSError, IOError) as e: +LOG.warning("failed to truncate kvp pool file, %s", e) +finally: +cls._already_truncated_pool_file = True + def _get_incarnation_no(self): """ use the time passed as the incarnation number. @@ -162,20 +184,15 @@ class HyperVKvpReportingHandler(ReportingHandler): def _iterate_kvps(self, offset): """iterate the kvp file from the current offset.""" -try: -with open(self._kvp_file_path, 'rb+') as f: -self.kvp_file = f -fcntl.flock(f, fcntl.LOCK_EX) -f.seek(offset) +with open(self._kvp_file_path, 'rb') as f: +fcntl.flock(f, fcntl.LOCK_EX) +f.seek(offset) +record_data = f.read(self.HV_KVP_RECORD_SIZE) +while len(record_data) == self.HV_KVP_RECORD_SIZE: +kvp_item = self._decode_kvp_item(record_data) +yield kvp_item record_data = f.read(self.HV_KVP_RECORD_SIZE) -while len(record_data) == self.HV_KVP_RECORD_SIZE: -self._current_offset += self.HV_KVP_RECORD_SIZE -kvp_item = self._decode_kvp_item(record_data) -yield kvp_item -record_data = f.read(self.HV_KVP_RECORD_SIZE) -fcntl.flock(f, fcntl.LOCK_UN) -finally: -self.kvp_file = None +fcntl.flock(f, fcntl.LOCK_UN) def _event_key(self, event): """ @@ -207,23 +224,13 @@ class HyperVKvpReportingHandler(ReportingHandler): return {'key': k, 'value': v} -def _update_kvp_item(self, record_data): -if self.kvp_file is None: -