Joshua Harlow has proposed merging lp:~harlowja/cloud-init/url-ssl-fixings into lp:cloud-init.
Requested reviews: cloud init development team (cloud-init-dev) Related bugs: Bug #1067888 in cloud-init: "Https verification + usage + support" https://bugs.launchpad.net/cloud-init/+bug/1067888 For more details, see: https://code.launchpad.net/~harlowja/cloud-init/url-ssl-fixings/+merge/149481 -- https://code.launchpad.net/~harlowja/cloud-init/url-ssl-fixings/+merge/149481 Your team cloud init development team is requested to review the proposed merge of lp:~harlowja/cloud-init/url-ssl-fixings into lp:cloud-init.
=== modified file 'Requires' --- Requires 2012-07-09 20:41:45 +0000 +++ Requires 2013-02-20 06:55:27 +0000 @@ -10,11 +10,6 @@ # datasource is removed, this is no longer needed oauth -# This is used to fetch the ec2 metadata into a easily -# parseable format, instead of having to have cloud-init perform -# those same fetchs and decodes and signing (...) that ec2 requires. -boto - # This is only needed for places where we need to support configs in a manner # that the built-in config parser is not sufficent (ie # when we need to preserve comments, or do not have a top-level @@ -26,3 +21,6 @@ # The new main entrypoint uses argparse instead of optparse argparse + +# Requests handles ssl correctly! +requests === modified file 'cloudinit/config/cc_phone_home.py' --- cloudinit/config/cc_phone_home.py 2012-10-28 02:25:48 +0000 +++ cloudinit/config/cc_phone_home.py 2013-02-20 06:55:27 +0000 @@ -19,7 +19,6 @@ # along with this program. If not, see <http://www.gnu.org/licenses/>. from cloudinit import templater -from cloudinit import url_helper as uhelp from cloudinit import util from cloudinit.settings import PER_INSTANCE @@ -112,7 +111,8 @@ } url = templater.render_string(url, url_params) try: - uhelp.readurl(url, data=real_submit_keys, retries=tries, sec_between=3) + util.read_file_or_url(url, data=real_submit_keys, + retries=tries, sec_between=3) except: util.logexc(log, ("Failed to post phone home data to" " %s in %s tries"), url, tries) === added file 'cloudinit/ec2_utils.py' --- cloudinit/ec2_utils.py 1970-01-01 00:00:00 +0000 +++ cloudinit/ec2_utils.py 2013-02-20 06:55:27 +0000 @@ -0,0 +1,153 @@ +# vi: ts=4 expandtab +# +# Copyright (C) 2012 Yahoo! Inc. +# +# Author: Joshua Harlow <[email protected]> +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 3, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +from urlparse import (urlparse, urlunparse) + +import json +import urllib + +from cloudinit import log as logging +from cloudinit import util + +LOG = logging.getLogger(__name__) + + +def combine_url(base, add_on): + base_parsed = list(urlparse(base)) + path = base_parsed[2] + if path and not path.endswith("/"): + path += "/" + path += urllib.quote(str(add_on), safe="/:") + base_parsed[2] = path + return urlunparse(base_parsed) + + +# See: http://bit.ly/TyoUQs +class MetadataMaterializer(object): + def __init__(self, blob, base_url): + self._blob = blob + self._md = None + self._base_url = base_url + + def _parse(self, blob): + leaves = {} + children = [] + if not blob: + return (leaves, children) + + def has_children(item): + if item.endswith("/"): + return True + else: + return False + + def get_name(item): + if item.endswith("/"): + return item.rstrip("/") + return item + + for field in blob.splitlines(): + field = field.strip() + field_name = get_name(field) + if not field or not field_name: + continue + if has_children(field): + if field_name not in children: + children.append(field_name) + else: + contents = field.split("=", 1) + resource = field_name + if len(contents) > 1: + # What a PITA... + (ident, sub_contents) = contents + checked_ident = util.safe_int(ident) + if checked_ident is not None: + resource = "%s/openssh-key" % (checked_ident) + field_name = sub_contents + leaves[field_name] = resource + return (leaves, children) + + def materialize(self): + if self._md is not None: + return self._md + self._md = self._materialize(self._blob, self._base_url) + return self._md + + def _fetch_url(self, url): + response = util.read_file_or_url(url) + return str(response) + + def _decode_leaf_blob(self, blob): + if not blob: + return blob + stripped_blob = blob.strip() + if stripped_blob.startswith("{") and stripped_blob.endswith("}"): + # Assume and try with json + try: + return json.loads(blob) + except (ValueError, TypeError): + pass + if blob.find("\n") != -1: + return blob.splitlines() + return blob + + def _materialize(self, blob, base_url): + (leaves, children) = self._parse(blob) + child_contents = {} + for c in children: + child_url = combine_url(base_url, c) + if not child_url.endswith("/"): + child_url += "/" + child_blob = self._fetch_url(child_url) + child_contents[c] = self._materialize(child_blob, child_url) + leaf_contents = {} + for (field, resource) in leaves.items(): + leaf_url = combine_url(base_url, resource) + leaf_blob = self._fetch_url(leaf_url) + leaf_contents[field] = self._decode_leaf_blob(leaf_blob) + joined = {} + joined.update(child_contents) + for field in leaf_contents.keys(): + if field in joined: + LOG.warn("Duplicate key found in results from %s", base_url) + else: + joined[field] = leaf_contents[field] + return joined + + +def get_instance_userdata(url, version='latest', ssl_details=None): + ud_url = combine_url(url, version) + ud_url = combine_url(ud_url, 'user-data') + try: + response = util.read_file_or_url(ud_url) + return str(response) + except Exception: + util.logexc(LOG, "Failed fetching userdata from url %s", ud_url) + return None + + +def get_instance_metadata(url, version='latest'): + md_url = combine_url(url, version) + md_url = combine_url(md_url, 'meta-data') + try: + response = util.read_file_or_url(md_url) + materializer = MetadataMaterializer(str(response), md_url) + return materializer.materialize() + except Exception: + util.logexc(LOG, "Failed fetching metadata from url %s", md_url) + return None === renamed file 'cloudinit/ec2_utils.py' => 'cloudinit/ec2_utils.py.moved' === modified file 'cloudinit/sources/DataSourceCloudStack.py' --- cloudinit/sources/DataSourceCloudStack.py 2013-01-07 17:20:58 +0000 +++ cloudinit/sources/DataSourceCloudStack.py 2013-02-20 06:55:27 +0000 @@ -25,7 +25,11 @@ import os import time +<<<<<<< TREE from cloudinit import ec2_utils as ec2 +======= +from cloudinit import ec2_utils +>>>>>>> MERGE-SOURCE from cloudinit import log as logging from cloudinit import sources from cloudinit import url_helper as uhelp @@ -104,10 +108,15 @@ if not self.wait_for_metadata_service(): return False start_time = time.time() +<<<<<<< TREE self.userdata_raw = ec2.get_instance_userdata(self.api_ver, self.metadata_address) self.metadata = ec2.get_instance_metadata(self.api_ver, self.metadata_address) +======= + self.userdata_raw = ec2_utils.get_instance_userdata(self.metadata_address, self.api_ver) + self.metadata = ec2_utils.get_instance_metadata(self.metadata_address, self.api_ver) +>>>>>>> MERGE-SOURCE LOG.debug("Crawl of metadata service took %s seconds", int(time.time() - start_time)) return True === modified file 'cloudinit/sources/DataSourceEc2.py' --- cloudinit/sources/DataSourceEc2.py 2012-11-12 17:23:44 +0000 +++ cloudinit/sources/DataSourceEc2.py 2013-02-20 06:55:27 +0000 @@ -23,7 +23,11 @@ import os import time +<<<<<<< TREE from cloudinit import ec2_utils as ec2 +======= +from cloudinit import ec2_utils +>>>>>>> MERGE-SOURCE from cloudinit import log as logging from cloudinit import sources from cloudinit import url_helper as uhelp @@ -64,10 +68,15 @@ if not self.wait_for_metadata_service(): return False start_time = time.time() +<<<<<<< TREE self.userdata_raw = ec2.get_instance_userdata(self.api_ver, self.metadata_address) self.metadata = ec2.get_instance_metadata(self.api_ver, self.metadata_address) +======= + self.userdata_raw = ec2_utils.get_instance_userdata(self.metadata_address, self.api_ver) + self.metadata = ec2_utils.get_instance_metadata(self.metadata_address, self.api_ver) +>>>>>>> MERGE-SOURCE LOG.debug("Crawl of metadata service took %s seconds", int(time.time() - start_time)) return True @@ -136,7 +145,7 @@ start_time = time.time() url = uhelp.wait_for_url(urls=urls, max_wait=max_wait, - timeout=timeout, status_cb=LOG.warn) + timeout=timeout, status_cb=LOG.warn) if url: LOG.debug("Using metadata source: '%s'", url2base[url]) === modified file 'cloudinit/sources/DataSourceMAAS.py' --- cloudinit/sources/DataSourceMAAS.py 2012-10-23 16:58:32 +0000 +++ cloudinit/sources/DataSourceMAAS.py 2013-02-20 06:55:27 +0000 @@ -25,9 +25,11 @@ import time import urllib2 +import requests + from cloudinit import log as logging from cloudinit import sources -from cloudinit import url_helper as uhelp +from cloudinit import url_helper from cloudinit import util LOG = logging.getLogger(__name__) @@ -191,8 +193,8 @@ version=MD_VERSION): """ Read the maas datasource at seed_url. - header_cb is a method that should return a headers dictionary that will - be given to urllib2.Request() + - header_cb is a method that should return a headers dictionary for + a given url Expected format of seed_url is are the following files: * <seed_url>/<version>/meta-data/instance-id @@ -220,13 +222,13 @@ else: headers = {} try: - resp = uhelp.readurl(url, headers=headers, timeout=timeout) - if resp.ok(): + resp = util.read_file_or_url(url, headers=headers, timeout=timeout) + if resp.ok: md[name] = str(resp) else: LOG.warn(("Fetching from %s resulted in" - " an invalid http code %s"), url, resp.code) - except urllib2.HTTPError as e: + " an invalid http code %s"), url, resp.status_code) + except url_helper.UrlError as e: if e.code != 404: raise return check_seed_contents(md, seed_url) === modified file 'cloudinit/url_helper.py' --- cloudinit/url_helper.py 2012-09-24 21:13:38 +0000 +++ cloudinit/url_helper.py 2013-02-20 06:55:27 +0000 @@ -20,119 +20,162 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>. -from contextlib import closing - -import errno -import socket import time -import urllib -import urllib2 + +import requests +from requests import exceptions + +from urlparse import (urlparse, urlunparse) from cloudinit import log as logging from cloudinit import version LOG = logging.getLogger(__name__) +# Check if requests has ssl support (added in requests >= 0.8.8) +SSL_ENABLED = False +CONFIG_ENABLED = False # This was added in 0.7 +try: + import pkg_resources + from distutils.version import LooseVersion + _REQ = pkg_resources.get_distribution('requests') + _REQ_VER = LooseVersion(_REQ.version) + if _REQ_VER >= LooseVersion('0.8.8'): + SSL_ENABLED = True + if _REQ_VER >= LooseVersion('0.7.0'): + CONFIG_ENABLED = True +except: + pass + + +def _cleanurl(url): + parsed_url = list(urlparse(url, scheme='http')) + if not parsed_url[1] and parsed_url[2]: + # Swap these since this seems to be a common + # occurrence when given urls like 'www.google.com' + parsed_url[1] = parsed_url[2] + parsed_url[2] = '' + return urlunparse(parsed_url) + class UrlResponse(object): - def __init__(self, status_code, contents=None, headers=None): - self._status_code = status_code - self._contents = contents - self._headers = headers - - @property - def code(self): - return self._status_code + def __init__(self, response): + self._response = response @property def contents(self): - return self._contents + return self._response.content + + @property + def url(self): + return self._response.url + + @property + def ok(self): + return self._response.ok @property def headers(self): - return self._headers + return self._response.headers + + @property + def code(self): + return self._response.status_code def __str__(self): - if not self.contents: - return '' - else: - return str(self.contents) - - def ok(self, redirects_ok=False): - upper = 300 - if redirects_ok: - upper = 400 - if self.code >= 200 and self.code < upper: - return True - else: - return False - - -def readurl(url, data=None, timeout=None, - retries=0, sec_between=1, headers=None): - - req_args = {} - req_args['url'] = url - if data is not None: - req_args['data'] = urllib.urlencode(data) - + return self.contents + + +class UrlError(IOError): + def __init__(self, cause): + IOError.__init__(self, str(cause)) + self.cause = cause + if isinstance(cause, exceptions.HTTPError) and cause.response: + self.code = cause.response.status_code + else: + self.code = None + + +def readurl(url, data=None, timeout=None, retries=0, sec_between=1, + headers=None, ssl_details=None, check_status=True, + allow_redirects=False): + url = _cleanurl(url) + req_args = { + 'url': url, + } + if urlparse(url).scheme == 'https' and ssl_details: + if not SSL_ENABLED: + LOG.warn("SSL is not enabled, cert. verification can not occur!") + else: + if 'ca_certs' in ssl_details and ssl_details['ca_certs']: + req_args['verify'] = ssl_details['ca_certs'] + else: + req_args['verify'] = True + if 'cert_file' in ssl_details and 'key_file' in ssl_details: + req_args['cert'] = [ssl_details['cert_file'], + ssl_details['key_file']] + elif 'cert_file' in ssl_details: + req_args['cert'] = str(ssl_details['cert_file']) + + req_args['allow_redirects'] = allow_redirects + req_args['method'] = 'GET' + if timeout is not None: + req_args['timeout'] = max(float(timeout), 0) + if data: + req_args['method'] = 'POST' + # It doesn't seem like config + # was added in older library versions, thus we + # need to manually do the retries if it wasn't + if CONFIG_ENABLED: + req_config = { + 'store_cookies': False, + } + # Don't use the retry support built-in + # since it doesn't allow for 'sleep_times' + # in between tries.... + # if retries: + # req_config['max_retries'] = max(int(retries), 0) + req_args['config'] = req_config + manual_tries = 1 + if retries: + manual_tries = max(int(retries) + 1, 1) if not headers: headers = { 'User-Agent': 'Cloud-Init/%s' % (version.version_string()), } - req_args['headers'] = headers - req = urllib2.Request(**req_args) - - retries = max(retries, 0) - attempts = retries + 1 - - excepts = [] - LOG.debug(("Attempting to open '%s' with %s attempts" - " (%s retries, timeout=%s) to be performed"), - url, attempts, retries, timeout) - open_args = {} - if timeout is not None: - open_args['timeout'] = int(timeout) - for i in range(0, attempts): + LOG.debug("Attempting to open '%s' with %s configuration", url, req_args) + if data: + # Do this after the log (it might be large) + req_args['data'] = data + if sec_between is None: + sec_between = -1 + excps = [] + # Handle retrying ourselves since the built-in support + # doesn't handle sleeping between tries... + for i in range(0, manual_tries): try: - with closing(urllib2.urlopen(req, **open_args)) as rh: - content = rh.read() - status = rh.getcode() - if status is None: - # This seems to happen when files are read... - status = 200 - headers = {} - if rh.headers: - headers = dict(rh.headers) - LOG.debug("Read from %s (%s, %sb) after %s attempts", - url, status, len(content), (i + 1)) - return UrlResponse(status, content, headers) - except urllib2.HTTPError as e: - excepts.append(e) - except urllib2.URLError as e: - # This can be a message string or - # another exception instance - # (socket.error for remote URLs, OSError for local URLs). - if (isinstance(e.reason, (OSError)) and - e.reason.errno == errno.ENOENT): - excepts.append(e.reason) - else: - excepts.append(e) - except Exception as e: - excepts.append(e) - if i + 1 < attempts: - LOG.debug("Please wait %s seconds while we wait to try again", - sec_between) - time.sleep(sec_between) - - # Didn't work out - LOG.debug("Failed reading from %s after %s attempts", url, attempts) - - # It must of errored at least once for code - # to get here so re-raise the last error - LOG.debug("%s errors occured, re-raising the last one", len(excepts)) - raise excepts[-1] + r = requests.request(**req_args) + if check_status: + r.raise_for_status() + contents = r.content + status = r.status_code + headers = r.headers + LOG.debug("Read from %s (%s, %sb) after %s attempts", url, + status, len(contents), (i + 1)) + # Doesn't seem like we can make it use a different + # subclass for responses, so add our own backward-compat + # attrs + return UrlResponse(r) + except exceptions.RequestException as e: + excps.append(UrlError(e)) + if i + 1 < manual_tries and sec_between > 0: + LOG.debug("Please wait %s seconds while we wait to try again", + sec_between) + time.sleep(sec_between) + if excps: + raise excps[-1] + return None # Should throw before this... def wait_for_url(urls, max_wait=None, timeout=None, @@ -143,7 +186,7 @@ max_wait: roughly the maximum time to wait before giving up The max time is *actually* len(urls)*timeout as each url will be tried once and given the timeout provided. - timeout: the timeout provided to urllib2.urlopen + timeout: the timeout provided to urlopen status_cb: call method with string message when a url is not available headers_cb: call method with single argument of url to get headers for request. @@ -196,7 +239,8 @@ else: headers = {} - resp = readurl(url, headers=headers, timeout=timeout) + resp = readurl(url, headers=headers, timeout=timeout, + check_status=False) if not resp.contents: reason = "empty response [%s]" % (resp.code) e = ValueError(reason) @@ -205,12 +249,8 @@ e = ValueError(reason) else: return url - except urllib2.HTTPError as e: - reason = "http error [%s]" % e.code - except urllib2.URLError as e: - reason = "url error [%s]" % e.reason - except socket.timeout as e: - reason = "socket timeout [%s]" % e + except UrlError as e: + reason = "request error [%s]" % e except Exception as e: reason = "unexpected error [%s]" % e === modified file 'cloudinit/user_data.py' --- cloudinit/user_data.py 2012-10-10 16:27:28 +0000 +++ cloudinit/user_data.py 2013-02-20 06:55:27 +0000 @@ -29,7 +29,6 @@ from cloudinit import handlers from cloudinit import log as logging -from cloudinit import url_helper from cloudinit import util LOG = logging.getLogger(__name__) @@ -173,10 +172,10 @@ if include_once_on and os.path.isfile(include_once_fn): content = util.load_file(include_once_fn) else: - resp = url_helper.readurl(include_url) - if include_once_on and resp.ok(): + resp = util.read_file_or_url(include_url) + if include_once_on and resp.ok: util.write_file(include_once_fn, str(resp), mode=0600) - if resp.ok(): + if resp.ok: content = str(resp) else: LOG.warn(("Fetching from %s resulted in" === modified file 'cloudinit/util.py' --- cloudinit/util.py 2013-01-31 00:21:37 +0000 +++ cloudinit/util.py 2013-02-20 06:55:27 +0000 @@ -51,8 +51,12 @@ from cloudinit import importer from cloudinit import log as logging from cloudinit import safeyaml +<<<<<<< TREE from cloudinit import url_helper as uhelp from cloudinit import version +======= +from cloudinit import url_helper +>>>>>>> MERGE-SOURCE from cloudinit.settings import (CFG_BUILTIN) @@ -70,6 +74,18 @@ CONTAINER_TESTS = ['running-in-container', 'lxc-is-container'] +class FileResponse(object): + def __init__(self, path, contents): + self.code = 200 + self.headers = {} + self.contents = contents + self.ok = True + self.url = path + + def __str__(self): + return self.contents + + class ProcessExecutionError(IOError): MESSAGE_TMPL = ('%(description)s\n' @@ -624,12 +640,53 @@ raise -def read_file_or_url(url, timeout=5, retries=10, file_retries=0): +def fetch_ssl_details(paths=None): + ssl_details = {} + # Lookup in these locations for ssl key/cert files + ssl_cert_paths = [ + '/var/lib/cloud/data/ssl', + '/var/lib/cloud/instance/data/ssl', + ] + if paths: + ssl_cert_paths.extend([ + os.path.join(paths.get_ipath_cur('data'), 'ssl'), + os.path.join(paths.get_cpath('data'), 'ssl'), + ]) + ssl_cert_paths = uniq_merge(ssl_cert_paths) + ssl_cert_paths = [d for d in ssl_cert_paths if d and os.path.isdir(d)] + cert_file = None + for d in ssl_cert_paths: + if os.path.isfile(os.path.join(d, 'cert.pem')): + cert_file = os.path.join(d, 'cert.pem') + break + key_file = None + for d in ssl_cert_paths: + if os.path.isfile(os.path.join(d, 'key.pem')): + key_file = os.path.join(d, 'key.pem') + break + if cert_file and key_file: + ssl_details['cert_file'] = cert_file + ssl_details['key_file'] = key_file + elif cert_file: + ssl_details['cert_file'] = cert_file + return ssl_details + + +def read_file_or_url(url, timeout=5, retries=10, + headers=None, data=None, sec_between=1, paths=None): if url.startswith("/"): url = "file://%s" % url - if url.startswith("file://"): - retries = file_retries - return uhelp.readurl(url, timeout=timeout, retries=retries) + if url.lower().startswith("file://"): + file_path = url[len("file://"):] + return FileResponse(file_path, contents=load_file(file_path)) + else: + return url_helper.readurl(url, + timeout=timeout, + retries=retries, + headers=headers, + data=data, + sec_between=sec_between, + ssl_details=fetch_ssl_details(paths)) def load_yaml(blob, default=None, allowed=(dict,)): @@ -671,13 +728,13 @@ md_resp = read_file_or_url(md_url, timeout, retries, file_retries) md = None - if md_resp.ok(): + if md_resp.ok: md_str = str(md_resp) md = load_yaml(md_str, default={}) ud_resp = read_file_or_url(ud_url, timeout, retries, file_retries) ud = None - if ud_resp.ok(): + if ud_resp.ok: ud_str = str(ud_resp) ud = ud_str @@ -846,8 +903,8 @@ if not url: return (None, None, None) - resp = uhelp.readurl(url) - if resp.contents.startswith(starts) and resp.ok(): + resp = read_file_or_url(url) + if resp.contents.startswith(starts) and resp.ok: return (key, url, str(resp)) return (key, url, None)
_______________________________________________ Mailing list: https://launchpad.net/~cloud-init-dev Post to : [email protected] Unsubscribe : https://launchpad.net/~cloud-init-dev More help : https://help.launchpad.net/ListHelp

