# Copyright 2015 Canonical Ltd. # This file is part of cloud-init. See LICENCE file for license information. # # vi: ts=4 expandtab import logging import time try: from time import monotonic as now except ImportError: # pragma: nocover from time import time as now import requests from requests import adapters from requests import exceptions from requests import structures # Arg, why does requests vendorize urllib3.... from requests.packages.urllib3 import util as urllib3_util from six.moves.urllib.parse import quote as urlquote # noqa from six.moves.urllib.parse import urlparse # noqa from six.moves.urllib.parse import urlunparse # noqa from six.moves.http_client import BAD_REQUEST as _BAD_REQUEST from six.moves.http_client import CONFLICT # noqa from six.moves.http_client import MULTIPLE_CHOICES as _MULTIPLE_CHOICES from six.moves.http_client import OK from cloudinit import version SSL_ENABLED = True try: import ssl as _ssl # noqa except ImportError: SSL_ENABLED = False LOG = logging.getLogger(__name__) def _get_base_url(url): parsed_url = list(urlparse(url, scheme='http')) parsed_url[2] = parsed_url[3] = parsed_url[4] = parsed_url[5] = '' return urlunparse(parsed_url) def _clean_url(url): parsed_url = list(urlparse(url, scheme='http')) if not parsed_url[1] and parsed_url[2]: # Swap these since this seems to be a common # occurrence when given urls like 'www.google.com' parsed_url[1] = parsed_url[2] parsed_url[2] = '' return urlunparse(parsed_url) class _Retry(urllib3_util.Retry): def is_forced_retry(self, method, status_code): # Allow >= 400 to be tried... return status_code >= _BAD_REQUEST def sleep(self): # The base class doesn't have a way to log what we are doing, # so replace it with one that does... backoff = self.get_backoff_time() if backoff <= 0: return else: LOG.debug("Please wait %s seconds while we wait to try again", backoff) time.sleep(backoff) class RequestsResponse(object): """A wrapper for requests responses (that provides common functions). This exists so that things like StringResponse or FileResponse can also exist, but with different sources of their response (aka not just from the requests library). """ def __init__(self, response): self._response = response @property def contents(self): return self._response.content @property def url(self): return self._response.url def ok(self, redirects_ok=False): upper = _MULTIPLE_CHOICES if redirects_ok: upper = _BAD_REQUEST return self.status_code >= OK and self.status_code < upper @property def headers(self): return self._response.headers @property def status_code(self): return self._response.status_code def __str__(self): return self._response.text class UrlError(IOError): def __init__(self, cause, code=None, headers=None): super(UrlError, self).__init__(str(cause)) self.cause = cause self.status_code = code self.headers = headers or {} def _get_ssl_args(url, ssl_details): ssl_args = {} scheme = urlparse(url).scheme if scheme == 'https' and ssl_details: if not SSL_ENABLED: LOG.warn("SSL is not supported, " "cert. verification can not occur!") else: if 'ca_certs' in ssl_details and ssl_details['ca_certs']: ssl_args['verify'] = ssl_details['ca_certs'] else: ssl_args['verify'] = True if 'cert_file' in ssl_details and 'key_file' in ssl_details: ssl_args['cert'] = [ssl_details['cert_file'], ssl_details['key_file']] elif 'cert_file' in ssl_details: ssl_args['cert'] = str(ssl_details['cert_file']) return ssl_args def read_url(url, data=None, timeout=None, retries=0, headers=None, ssl_details=None, check_status=True, allow_redirects=True): """Fetch a url (or post to one) with the given options. :param url: url to fetch :param data: any data to POST (this switches the request method to POST instead of GET) :param timeout: the timeout (in seconds) to wait for a response :param headers: any headers to provide (and send along) in the request :param ssl_details: a dictionary containing any ssl settings, cert_file, ca_certs and verify are valid entries (and they are only used when the url provided is https) :param check_status: checks that the response status is OK after fetching (this ensures a exception is raised on non-OK status codes) :param allow_redirects: enables redirects (or disables them) :param retries: maximum number of retries to attempt when fetching the url and the fetch fails """ url = _clean_url(url) request_args = { 'url': url, } request_args.update(_get_ssl_args(url, ssl_details)) request_args['allow_redirects'] = allow_redirects request_args['method'] = 'GET' if timeout is not None: request_args['timeout'] = max(float(timeout), 0) if data: request_args['method'] = 'POST' request_args['data'] = data if not headers: headers = structures.CaseInsensitiveDict() else: headers = structures.CaseInsensitiveDict(headers) if 'User-Agent' not in headers: headers['User-Agent'] = 'Cloud-Init/%s' % (version.version_string()) request_args['headers'] = headers session = requests.Session() if retries: retry = _Retry(total=max(int(retries), 0), raise_on_redirect=not allow_redirects) session.mount(_get_base_url(url), adapters.HTTPAdapter(max_retries=retry)) try: with session: response = session.request(**request_args) if check_status: response.raise_for_status() except exceptions.RequestException as e: if e.response is not None: raise UrlError(e, code=e.response.status_code, headers=e.response.headers) else: raise UrlError(e) else: LOG.debug("Read from %s (%s, %sb)", url, response.status_code, len(response.content)) return RequestsResponse(response) def wait_any_url(urls, max_wait=None, timeout=None, status_cb=None, sleep_time=1, exception_cb=None): """Wait for one of many urls to respond correctly. :param urls: a list of urls to try :param max_wait: roughly the maximum time to wait before giving up :param timeout: the timeout provided to ``read_url`` :param status_cb: call method with string message when a url is not available :param exception_cb: call method with 2 arguments 'msg' (per status_cb) and 'exception', the exception that occurred. :param sleep_time: how long to sleep before trying each url again The idea of this routine is to wait for the EC2 metdata service to come up. On both Eucalyptus and EC2 we have seen the case where the instance hit the MD before the MD service was up. EC2 seems to have permenantely fixed this, though. In openstack, the metadata service might be painfully slow, and unable to avoid hitting a timeout of even up to 10 seconds or more (LP: #894279) for a simple GET. Offset those needs with the need to not hang forever (and block boot) on a system where cloud-init is configured to look for EC2 Metadata service but is not going to find one. It is possible that the instance data host (169.254.169.254) may be firewalled off Entirely for a sytem, meaning that the connection will block forever unless a timeout is set. This will return a tuple of the first url which succeeded and the response object. """ start_time = now() def log_status_cb(msg, exc=None): LOG.debug(msg) if not status_cb: status_cb = log_status_cb def timeup(max_wait, start_time): current_time = now() return ((max_wait <= 0 or max_wait is None) or (current_time - start_time > max_wait)) loop_n = 0 while True: # This makes a backoff with the following graph: # # https://www.desmos.com/calculator/c8pwjy6wmt sleep_time = int(loop_n / 5) + 1 for url in urls: current_time = now() if loop_n != 0: if timeup(max_wait, start_time): break if (timeout and (current_time + timeout > (start_time + max_wait))): # shorten timeout to not run way over max_time timeout = int((start_time + max_wait) - current_time) reason = "" url_exc = None try: response = read_url(url, timeout=timeout, check_status=False) if not response.contents: reason = "empty response [%s]" % (response.code) url_exc = UrlError(ValueError(reason), code=response.code, headers=response.headers) elif not response.ok(): reason = "bad status code [%s]" % (response.code) url_exc = UrlError(ValueError(reason), code=response.code, headers=response.headers) else: return url, response except UrlError as e: reason = "request error [%s]" % e url_exc = e except Exception as e: reason = "unexpected error [%s]" % e url_exc = e current_time = now() time_taken = int(current_time - start_time) status_msg = "Calling '%s' failed [%s/%ss]: %s" % (url, time_taken, max_wait, reason) status_cb(status_msg) if exception_cb: exception_cb(msg=status_msg, exception=url_exc) if timeup(max_wait, start_time): break loop_n = loop_n + 1 LOG.debug("Please wait %s seconds while we wait to try again", sleep_time) time.sleep(sleep_time) return None