[Cloud-init-dev] [Merge] ~vtqanh/cloud-init:migrate-lp-to-github into cloud-init:master

2020-07-15 Thread Anh Vo (MSFT)
Anh Vo (MSFT) has proposed merging ~vtqanh/cloud-init:migrate-lp-to-github into 
cloud-init:master.

Commit message:
lp-to-git-users: adding anhvoms

Mapped from vtqanh


Requested reviews:
  cloud-init Commiters (cloud-init-dev)

For more details, see:
https://code.launchpad.net/~vtqanh/cloud-init/+git/cloud-init/+merge/387447
-- 
Your team cloud-init Commiters is requested to review the proposed merge of 
~vtqanh/cloud-init:migrate-lp-to-github into cloud-init:master.
diff --git a/tools/.lp-to-git-user b/tools/.lp-to-git-user
index 32cc1fa..89422db 100644
--- a/tools/.lp-to-git-user
+++ b/tools/.lp-to-git-user
@@ -28,5 +28,6 @@
  "rjschwei": "rjschwei",
  "tribaal": "chrisglass",
  "trstringer": "trstringer",
+ "vtqanh": "anhvoms",
  "xiaofengw": "xiaofengw-vmware"
 }
\ No newline at end of file
___
Mailing list: https://launchpad.net/~cloud-init-dev
Post to : cloud-init-dev@lists.launchpad.net
Unsubscribe : https://launchpad.net/~cloud-init-dev
More help   : https://help.launchpad.net/ListHelp


[Cloud-init-dev] [Merge] ~vtqanh/cloud-init:UpdateReporterForAnalyze into cloud-init:master

2019-07-15 Thread Anh Vo (MSFT)
Anh Vo (MSFT) has proposed merging ~vtqanh/cloud-init:UpdateReporterForAnalyze 
into cloud-init:master.

Commit message:
analyze: fix poor formatting due to additional datasource events

DataSourceAzure is emitting additional events into the cloud-init
log which causes analyze module to produce somewhat confusing output.
This is due to two issues: 1) DataSourceAzure does not emit the 
stage (e.g., init-local) and analyze expects to see it in the event
output. 2) analyze did not correctly process nested stages.
This change saves the stage name into the reporting module so that
other reporter can use it to know which stage it is in and fixes the
analyze module to process multiple levels of nested events.

Requested reviews:
  cloud-init commiters (cloud-init-dev)

For more details, see:
https://code.launchpad.net/~vtqanh/cloud-init/+git/cloud-init/+merge/370156
-- 
Your team cloud-init commiters is requested to review the proposed merge of 
~vtqanh/cloud-init:UpdateReporterForAnalyze into cloud-init:master.
diff --git a/cloudinit/analyze/show.py b/cloudinit/analyze/show.py
index 3e778b8..b15cd2c 100644
--- a/cloudinit/analyze/show.py
+++ b/cloudinit/analyze/show.py
@@ -94,6 +94,10 @@ def event_parent(event):
 return None
 
 
+def event_is_stage(event):
+return '/' not in event_name(event)
+
+
 def event_timestamp(event):
 return float(event.get('timestamp'))
 
@@ -146,7 +150,9 @@ def generate_records(events, blame_sort=False,
 next_evt = None
 
 if event_type(event) == 'start':
-if event.get('name') in stages_seen:
+stage_name = event_parent(event)
+if stage_name == event_name(event) and stage_name in stages_seen:
+# new boot record
 records.append(total_time_record(total_time))
 boot_records.append(records)
 records = []
@@ -166,19 +172,26 @@ def generate_records(events, blame_sort=False,
   event,
   next_evt)))
 else:
-# This is a parent event
-records.append("Starting stage: %s" % event.get('name'))
-unprocessed.append(event)
-stages_seen.append(event.get('name'))
-continue
+if event_is_stage(event):
+records.append("Starting stage: %s" % event.get('name'))
+unprocessed.append(event)
+stages_seen.append(event.get('name'))
+else:
+# Start of a substage event
+records.append(format_record(print_format,
+ event_record(start_time,
+  event,
+  next_evt)))
+
 else:
 prev_evt = unprocessed.pop()
 if event_name(event) == event_name(prev_evt):
-record = event_record(start_time, prev_evt, event)
-records.append(format_record("Finished stage: "
- "(%n) %d seconds ",
- record) + "\n")
-total_time += record.get('delta')
+if event_is_stage(event):
+record = event_record(start_time, prev_evt, event)
+records.append(format_record("Finished stage: "
+ "(%n) %d seconds ",
+ record) + "\n")
+total_time += record.get('delta')
 else:
 # not a match, put it back
 unprocessed.append(prev_evt)
diff --git a/cloudinit/cmd/main.py b/cloudinit/cmd/main.py
index a5446da..bcac69e 100644
--- a/cloudinit/cmd/main.py
+++ b/cloudinit/cmd/main.py
@@ -885,7 +885,7 @@ def main(sysv_args=None):
 report_on = False
 
 args.reporter = events.ReportEventStack(
-rname, rdesc, reporting_enabled=report_on)
+rname, rdesc, reporting_enabled=report_on, global_stage=rname)
 
 with args.reporter:
 retval = util.log_time(
diff --git a/cloudinit/reporting/events.py b/cloudinit/reporting/events.py
index e5dfab3..2499849 100644
--- a/cloudinit/reporting/events.py
+++ b/cloudinit/reporting/events.py
@@ -28,13 +28,14 @@ class _nameset(set):
 
 
 status = _nameset(("SUCCESS", "WARN", "FAIL"))
+reporting_stage = None
 
 
 class ReportingEvent(object):
 """Encapsulation of event formatting."""
 
 def __init__(self, event_type, name, description,
- origin=DEFAULT_EVENT_ORIGIN, timestamp=None):
+ origin=DEFAULT_EVENT_ORIGIN, timestamp

[Cloud-init-dev] [Merge] ~vtqanh/cloud-init:ProvisioningTelemetry into cloud-init:master

2019-07-05 Thread Anh Vo (MSFT)
Anh Vo (MSFT) has proposed merging ~vtqanh/cloud-init:ProvisioningTelemetry 
into cloud-init:master.

Commit message:
Azure: Record boot timestamps, system information, and diagnostic events

Collect and record the following information through KVP:
 + timestamps related to kernel initialization and systemd activation 
   of cloud-init services
 + system information including cloud-init version, kernel version, 
   distro version, and python version
 + diagnostic events for the most common provisioning error issues
   such as empty dhcp lease, corrupted ovf-env.xml, etc.

Requested reviews:
  cloud-init commiters (cloud-init-dev)

For more details, see:
https://code.launchpad.net/~vtqanh/cloud-init/+git/cloud-init/+merge/369785
-- 
Your team cloud-init commiters is requested to review the proposed merge of 
~vtqanh/cloud-init:ProvisioningTelemetry into cloud-init:master.
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index d2fad9b..f83b27e 100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -28,7 +28,10 @@ from cloudinit.reporting import events
 
 from cloudinit.sources.helpers.azure import (azure_ds_reporter,
  azure_ds_telemetry_reporter,
- get_metadata_from_fabric)
+ get_metadata_from_fabric,
+ get_boot_telemetry,
+ get_system_info,
+ report_diagnostic_event)
 
 LOG = logging.getLogger(__name__)
 
@@ -354,7 +357,7 @@ class DataSourceAzure(sources.DataSource):
 bname = str(pk['fingerprint'] + ".crt")
 fp_files += [os.path.join(ddir, bname)]
 LOG.debug("ssh authentication: "
-  "using fingerprint from fabirc")
+  "using fingerprint from fabric")
 
 with events.ReportEventStack(
 name="waiting-for-ssh-public-key",
@@ -419,12 +422,17 @@ class DataSourceAzure(sources.DataSource):
 ret = load_azure_ds_dir(cdev)
 
 except NonAzureDataSource:
+report_diagnostic_event(
+"Did not find Azure data source in %s" % cdev)
 continue
 except BrokenAzureDataSource as exc:
 msg = 'BrokenAzureDataSource: %s' % exc
+report_diagnostic_event(msg)
 raise sources.InvalidMetaDataException(msg)
 except util.MountFailedError:
-LOG.warning("%s was not mountable", cdev)
+msg = '%s was not mountable' % cdev
+report_diagnostic_event(msg)
+LOG.warning(msg)
 continue
 
 perform_reprovision = reprovision or self._should_reprovision(ret)
@@ -432,6 +440,7 @@ class DataSourceAzure(sources.DataSource):
 if util.is_FreeBSD():
 msg = "Free BSD is not supported for PPS VMs"
 LOG.error(msg)
+report_diagnostic_event(msg)
 raise sources.InvalidMetaDataException(msg)
 ret = self._reprovision()
 imds_md = get_metadata_from_imds(
@@ -450,7 +459,9 @@ class DataSourceAzure(sources.DataSource):
 break
 
 if not found:
-raise sources.InvalidMetaDataException('No Azure metadata found')
+msg = 'No Azure metadata found'
+report_diagnostic_event(msg)
+raise sources.InvalidMetaDataException(msg)
 
 if found == ddir:
 LOG.debug("using files cached in %s", ddir)
@@ -469,9 +480,13 @@ class DataSourceAzure(sources.DataSource):
 self._report_ready(lease=self._ephemeral_dhcp_ctx.lease)
 self._ephemeral_dhcp_ctx.clean_network()  # Teardown ephemeral
 else:
-with EphemeralDHCPv4() as lease:
-self._report_ready(lease=lease)
-
+try:
+with EphemeralDHCPv4() as lease:
+self._report_ready(lease=lease)
+except Exception as e:
+report_diagnostic_event(
+"exception while reporting ready: %s" % e)
+raise
 return crawled_data
 
 def _is_platform_viable(self):
@@ -493,6 +508,16 @@ class DataSourceAzure(sources.DataSource):
 if not self._is_platform_viable():
 return False
 try:
+get_boot_telemetry(self.distro)
+except Exception as e:
+LOG.warning("Failed to get boot telemetry: %s", e)
+
+try:
+get_system_info()
+except Exception as e:

[Cloud-init-dev] [Merge] ~vtqanh/cloud-init:adjustIMDSTimeout into cloud-init:master

2019-05-07 Thread Anh Vo (MSFT)
Anh Vo (MSFT) has proposed merging ~vtqanh/cloud-init:adjustIMDSTimeout into 
cloud-init:master.

Commit message:
DataSourceAzure: Adjust timeout for polling IMDS

If the IMDS primary server is not available, falling back to the
secondary server takes about 1s. The net result is that the
expected E2E time is slightly more than 1s. This change increases
the timeout to 2s to prevent the infinite loop of timeouts.

Requested reviews:
  cloud-init commiters (cloud-init-dev)

For more details, see:
https://code.launchpad.net/~vtqanh/cloud-init/+git/cloud-init/+merge/367082
-- 
Your team cloud-init commiters is requested to review the proposed merge of 
~vtqanh/cloud-init:adjustIMDSTimeout into cloud-init:master.
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index 6416525..b7440c1 100755
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -57,7 +57,12 @@ AZURE_CHASSIS_ASSET_TAG = '7783-7084-3265-9085-8269-3286-77'
 REPROVISION_MARKER_FILE = "/var/lib/cloud/data/poll_imds"
 REPORTED_READY_MARKER_FILE = "/var/lib/cloud/data/reported_ready"
 AGENT_SEED_DIR = '/var/lib/waagent'
+
+# In the event where the IMDS primary server is not
+# available, it takes 1s to fallback to the secondary one
+IMDS_TIMEOUT_IN_SECONDS = 2
 IMDS_URL = "http://169.254.169.254/metadata/;
+
 PLATFORM_ENTROPY_SOURCE = "/sys/firmware/acpi/tables/OEM0"
 
 # List of static scripts and network config artifacts created by
@@ -582,9 +587,9 @@ class DataSourceAzure(sources.DataSource):
 return
 self._ephemeral_dhcp_ctx.clean_network()
 else:
-return readurl(url, timeout=1, headers=headers,
-   exception_cb=exc_cb, infinite=True,
-   log_req_resp=False).contents
+return readurl(url, timeout=IMDS_TIMEOUT_IN_SECONDS,
+   headers=headers, exception_cb=exc_cb,
+   infinite=True, log_req_resp=False).contents
 except UrlError:
 # Teardown our EphemeralDHCPv4 context on failure as we retry
 self._ephemeral_dhcp_ctx.clean_network()
@@ -1291,8 +1296,8 @@ def _get_metadata_from_imds(retries):
 headers = {"Metadata": "true"}
 try:
 response = readurl(
-url, timeout=1, headers=headers, retries=retries,
-exception_cb=retry_on_url_exc)
+url, timeout=IMDS_TIMEOUT_IN_SECONDS, headers=headers,
+retries=retries, exception_cb=retry_on_url_exc)
 except Exception as e:
 LOG.debug('Ignoring IMDS instance metadata: %s', e)
 return {}
diff --git a/tests/unittests/test_datasource/test_azure.py b/tests/unittests/test_datasource/test_azure.py
index ab77c03..427ab7e 100644
--- a/tests/unittests/test_datasource/test_azure.py
+++ b/tests/unittests/test_datasource/test_azure.py
@@ -163,7 +163,8 @@ class TestGetMetadataFromIMDS(HttprettyTestCase):
 
 m_readurl.assert_called_with(
 self.network_md_url, exception_cb=mock.ANY,
-headers={'Metadata': 'true'}, retries=2, timeout=1)
+headers={'Metadata': 'true'}, retries=2,
+timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS)
 
 @mock.patch('cloudinit.url_helper.time.sleep')
 @mock.patch(MOCKPATH + 'net.is_up')
@@ -1791,7 +1792,8 @@ class TestAzureDataSourcePreprovisioning(CiTestCase):
 headers={'Metadata': 'true',
  'User-Agent':
  'Cloud-Init/%s' % vs()
- }, method='GET', timeout=1,
+ }, method='GET',
+timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS,
 url=full_url)])
 self.assertEqual(m_dhcp.call_count, 2)
 m_net.assert_any_call(
@@ -1828,7 +1830,9 @@ class TestAzureDataSourcePreprovisioning(CiTestCase):
 headers={'Metadata': 'true',
  'User-Agent':
  'Cloud-Init/%s' % vs()},
-method='GET', timeout=1, url=full_url)])
+method='GET',
+timeout=dsaz.IMDS_TIMEOUT_IN_SECONDS,
+url=full_url)])
 self.assertEqual(m_dhcp.call_count, 2)
 m_net.assert_any_call(
 broadcast='192.168.2.255', interface='eth9', ip='192.168.2.9',
___
Mailing list: https://launchpad.net/~cloud-init-dev
Post to : cloud-init-dev@lists.launchpad.net
Unsubscribe : https://launchpad.net/~cloud-init-dev
More help   : https://help.launchpad.net/ListHelp


[Cloud-init-dev] [Merge] ~vtqanh/cloud-init:ImproveHyperVKvpReporter into cloud-init:master

2019-04-14 Thread Anh Vo (MSFT)
Anh Vo (MSFT) has proposed merging ~vtqanh/cloud-init:ImproveHyperVKvpReporter 
into cloud-init:master.

Commit message:
Azure: Changes to the Hyper-V KVP Reporter
 + Truncate KVP Pool file to prevent stale entries from being processed by the 
Hyper-V KVP reporter.
 + No longer update previous entries in the KVP pool as this is not desirable 
and potentially has negative impact to performance.
 + Batch appending of existing KVP entries to reduce performance impact

Requested reviews:
  cloud-init commiters (cloud-init-dev)

For more details, see:
https://code.launchpad.net/~vtqanh/cloud-init/+git/cloud-init/+merge/366044
-- 
Your team cloud-init commiters is requested to review the proposed merge of 
~vtqanh/cloud-init:ImproveHyperVKvpReporter into cloud-init:master.
diff --git a/cloudinit/reporting/handlers.py b/cloudinit/reporting/handlers.py
old mode 100644
new mode 100755
index 6d23558..7cca47e
--- a/cloudinit/reporting/handlers.py
+++ b/cloudinit/reporting/handlers.py
@@ -5,7 +5,6 @@ import fcntl
 import json
 import six
 import os
-import re
 import struct
 import threading
 import time
@@ -14,6 +13,7 @@ from cloudinit import log as logging
 from cloudinit.registry import DictRegistry
 from cloudinit import (url_helper, util)
 from datetime import datetime
+from six.moves.queue import Empty as QueueEmptyError
 
 if six.PY2:
 from multiprocessing.queues import JoinableQueue as JQueue
@@ -129,24 +129,46 @@ class HyperVKvpReportingHandler(ReportingHandler):
 DESC_IDX_KEY = 'msg_i'
 JSON_SEPARATORS = (',', ':')
 KVP_POOL_FILE_GUEST = '/var/lib/hyperv/.kvp_pool_1'
+_already_truncated_pool_file = False
 
 def __init__(self,
  kvp_file_path=KVP_POOL_FILE_GUEST,
  event_types=None):
 super(HyperVKvpReportingHandler, self).__init__()
 self._kvp_file_path = kvp_file_path
+HyperVKvpReportingHandler._truncate_guest_pool_file(
+self._kvp_file_path)
+
 self._event_types = event_types
 self.q = JQueue()
-self.kvp_file = None
 self.incarnation_no = self._get_incarnation_no()
 self.event_key_prefix = u"{0}|{1}".format(self.EVENT_PREFIX,
   self.incarnation_no)
-self._current_offset = 0
 self.publish_thread = threading.Thread(
 target=self._publish_event_routine)
 self.publish_thread.daemon = True
 self.publish_thread.start()
 
+@classmethod
+def _truncate_guest_pool_file(cls, kvp_file):
+"""
+Truncate the pool file if it has not been truncated since boot.
+This should be done exactly once for the file indicated by
+KVP_POOL_FILE_GUEST constant above. This method takes a filename
+so that we can use an arbitrary file during unit testing.
+"""
+if cls._already_truncated_pool_file:
+return
+boot_time = time.time() - float(util.uptime())
+try:
+if os.path.getmtime(kvp_file) < boot_time:
+with open(kvp_file, "w"):
+pass
+except (OSError, IOError) as e:
+LOG.warning("failed to truncate kvp pool file, %s", e)
+finally:
+cls._already_truncated_pool_file = True
+
 def _get_incarnation_no(self):
 """
 use the time passed as the incarnation number.
@@ -162,20 +184,15 @@ class HyperVKvpReportingHandler(ReportingHandler):
 
 def _iterate_kvps(self, offset):
 """iterate the kvp file from the current offset."""
-try:
-with open(self._kvp_file_path, 'rb+') as f:
-self.kvp_file = f
-fcntl.flock(f, fcntl.LOCK_EX)
-f.seek(offset)
+with open(self._kvp_file_path, 'rb') as f:
+fcntl.flock(f, fcntl.LOCK_EX)
+f.seek(offset)
+record_data = f.read(self.HV_KVP_RECORD_SIZE)
+while len(record_data) == self.HV_KVP_RECORD_SIZE:
+kvp_item = self._decode_kvp_item(record_data)
+yield kvp_item
 record_data = f.read(self.HV_KVP_RECORD_SIZE)
-while len(record_data) == self.HV_KVP_RECORD_SIZE:
-self._current_offset += self.HV_KVP_RECORD_SIZE
-kvp_item = self._decode_kvp_item(record_data)
-yield kvp_item
-record_data = f.read(self.HV_KVP_RECORD_SIZE)
-fcntl.flock(f, fcntl.LOCK_UN)
-finally:
-self.kvp_file = None
+fcntl.flock(f, fcntl.LOCK_UN)
 
 def _event_key(self, event):
 """
@@ -207,23 +224,13 @@ class HyperVKvpReportingHandler(ReportingHandler):
 
 return {'key': k, 'value': v}
 
-def _update_kvp_item(self, record_data):
-if self.kvp_file is None:
-