use 'requests' rather than urllib2.

This re-works the urlhelper and users of it to use requests rather
than urllib2.  The primary benefit is that now when using recent
versions of python-requests (>= 0.8.8) https certificates will be checked.
This commit is contained in:
Scott Moser 2013-03-20 08:35:24 -04:00
commit ff7b13a6e1
11 changed files with 290 additions and 141 deletions

View File

@ -50,6 +50,8 @@
- allow specifying apt-get command in distro config ('apt_get_command')
- support different and user-suppliable merging algorithms for cloud-config
(LP: #1023179)
- use python-requests rather than urllib2. By using recent versions of
python-requests, we get https support (LP: #1067888).
0.7.1:
- sysvinit: fix missing dependency in cloud-init job for RHEL 5.6

View File

@ -10,11 +10,6 @@ PrettyTable
# datasource is removed, this is no longer needed
oauth
# This is used to fetch the ec2 metadata into a easily
# parseable format, instead of having to have cloud-init perform
# those same fetchs and decodes and signing (...) that ec2 requires.
boto
# This is only needed for places where we need to support configs in a manner
# that the built-in config parser is not sufficent (ie
# when we need to preserve comments, or do not have a top-level
@ -26,3 +21,9 @@ pyyaml
# The new main entrypoint uses argparse instead of optparse
argparse
# Requests handles ssl correctly!
requests
# Boto for ec2
boto

View File

@ -19,7 +19,6 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from cloudinit import templater
from cloudinit import url_helper as uhelp
from cloudinit import util
from cloudinit.settings import PER_INSTANCE
@ -112,7 +111,9 @@ def handle(name, cfg, cloud, log, args):
}
url = templater.render_string(url, url_params)
try:
uhelp.readurl(url, data=real_submit_keys, retries=tries, sec_between=3)
util.read_file_or_url(url, data=real_submit_keys,
retries=tries, sec_between=3,
ssl_details=util.fetch_ssl_details(cloud.paths))
except:
util.logexc(log, ("Failed to post phone home data to"
" %s in %s tries"), url, tries)

View File

@ -137,8 +137,8 @@ class ResolvConf(object):
self._contents.append(('option', ['search', s_list, '']))
return flat_sds
@local_domain.setter
def local_domain(self, domain):
@local_domain.setter # pl51222 pylint: disable=E1101
def local_domain(self, domain): # pl51222 pylint: disable=E0102
self.parse()
self._remove_option('domain')
self._contents.append(('option', ['domain', str(domain), '']))

View File

@ -28,6 +28,10 @@ import boto.utils as boto_utils
# would have existed) do not exist due to the blocking
# that occurred.
# TODO(harlowja): https://github.com/boto/boto/issues/1401
# When boto finally moves to using requests, we should be able
# to provide it ssl details, it does not yet, so we can't provide them...
def _unlazy_dict(mp):
if not isinstance(mp, (dict)):

View File

@ -27,7 +27,7 @@ import urllib2
from cloudinit import log as logging
from cloudinit import sources
from cloudinit import url_helper as uhelp
from cloudinit import url_helper
from cloudinit import util
LOG = logging.getLogger(__name__)
@ -80,7 +80,8 @@ class DataSourceMAAS(sources.DataSource):
self.base_url = url
(userdata, metadata) = read_maas_seed_url(self.base_url,
self.md_headers)
self._md_headers,
paths=self.paths)
self.userdata_raw = userdata
self.metadata = metadata
return True
@ -88,7 +89,7 @@ class DataSourceMAAS(sources.DataSource):
util.logexc(LOG, "Failed fetching metadata from url %s", url)
return False
def md_headers(self, url):
def _md_headers(self, url):
mcfg = self.ds_cfg
# If we are missing token_key, token_secret or consumer_key
@ -132,36 +133,37 @@ class DataSourceMAAS(sources.DataSource):
starttime = time.time()
check_url = "%s/%s/meta-data/instance-id" % (url, MD_VERSION)
urls = [check_url]
url = uhelp.wait_for_url(urls=urls, max_wait=max_wait,
timeout=timeout, exception_cb=self._except_cb,
headers_cb=self.md_headers)
url = url_helper.wait_for_url(urls=urls, max_wait=max_wait,
timeout=timeout,
exception_cb=self._except_cb,
headers_cb=self._md_headers)
if url:
LOG.debug("Using metadata source: '%s'", url)
else:
LOG.critical("Giving up on md from %s after %i seconds",
urls, int(time.time() - starttime))
urls, int(time.time() - starttime))
return bool(url)
def _except_cb(self, msg, exception):
if not (isinstance(exception, urllib2.HTTPError) and
if not (isinstance(exception, url_helper.UrlError) and
(exception.code == 403 or exception.code == 401)):
return
if 'date' not in exception.headers:
LOG.warn("date field not in %d headers" % exception.code)
LOG.warn("Missing header 'date' in %s response", exception.code)
return
date = exception.headers['date']
try:
ret_time = time.mktime(parsedate(date))
except:
LOG.warn("failed to convert datetime '%s'")
except Exception as e:
LOG.warn("Failed to convert datetime '%s': %s", date, e)
return
self.oauth_clockskew = int(ret_time - time.time())
LOG.warn("set oauth clockskew to %d" % self.oauth_clockskew)
LOG.warn("Setting oauth clockskew to %d", self.oauth_clockskew)
return
@ -189,11 +191,11 @@ def read_maas_seed_dir(seed_d):
def read_maas_seed_url(seed_url, header_cb=None, timeout=None,
version=MD_VERSION):
version=MD_VERSION, paths=None):
"""
Read the maas datasource at seed_url.
header_cb is a method that should return a headers dictionary that will
be given to urllib2.Request()
- header_cb is a method that should return a headers dictionary for
a given url
Expected format of seed_url is are the following files:
* <seed_url>/<version>/meta-data/instance-id
@ -221,13 +223,17 @@ def read_maas_seed_url(seed_url, header_cb=None, timeout=None,
else:
headers = {}
try:
resp = uhelp.readurl(url, headers=headers, timeout=timeout)
ssl_details = util.fetch_ssl_details(paths)
resp = util.read_file_or_url(url,
headers=headers,
timeout=timeout,
ssl_details=ssl_details)
if resp.ok():
md[name] = str(resp)
else:
LOG.warn(("Fetching from %s resulted in"
" an invalid http code %s"), url, resp.code)
except urllib2.HTTPError as e:
except url_helper.UrlError as e:
if e.code != 404:
raise
return check_seed_contents(md, seed_url)
@ -370,7 +376,8 @@ if __name__ == "__main__":
if args.subcmd == "check-seed":
if args.url.startswith("http"):
(userdata, metadata) = read_maas_seed_url(args.url,
header_cb=my_headers, version=args.apiver)
header_cb=my_headers,
version=args.apiver)
else:
(userdata, metadata) = read_maas_seed_url(args.url)
print "=== userdata ==="

View File

@ -20,43 +20,55 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from contextlib import closing
import errno
import socket
import time
import urllib
import urllib2
import requests
from requests import exceptions
from urlparse import (urlparse, urlunparse)
from cloudinit import log as logging
from cloudinit import version
LOG = logging.getLogger(__name__)
# Check if requests has ssl support (added in requests >= 0.8.8)
SSL_ENABLED = False
CONFIG_ENABLED = False # This was added in 0.7 (but taken out in >=1.0)
try:
from distutils.version import LooseVersion
import pkg_resources
_REQ = pkg_resources.get_distribution('requests')
_REQ_VER = LooseVersion(_REQ.version) # pylint: disable=E1103
if _REQ_VER >= LooseVersion('0.8.8'):
SSL_ENABLED = True
if _REQ_VER >= LooseVersion('0.7.0') and _REQ_VER < LooseVersion('1.0.0'):
CONFIG_ENABLED = True
except:
pass
def _cleanurl(url):
parsed_url = list(urlparse(url, scheme='http')) # pylint: disable=E1123
if not parsed_url[1] and parsed_url[2]:
# Swap these since this seems to be a common
# occurrence when given urls like 'www.google.com'
parsed_url[1] = parsed_url[2]
parsed_url[2] = ''
return urlunparse(parsed_url)
class UrlResponse(object):
def __init__(self, status_code, contents=None, headers=None):
self._status_code = status_code
self._contents = contents
self._headers = headers
@property
def code(self):
return self._status_code
def __init__(self, response):
self._response = response
@property
def contents(self):
return self._contents
return self._response.content
@property
def headers(self):
return self._headers
def __str__(self):
if not self.contents:
return ''
else:
return str(self.contents)
def url(self):
return self._response.url
def ok(self, redirects_ok=False):
upper = 300
@ -67,72 +79,117 @@ class UrlResponse(object):
else:
return False
@property
def headers(self):
return self._response.headers
def readurl(url, data=None, timeout=None,
retries=0, sec_between=1, headers=None):
@property
def code(self):
return self._response.status_code
req_args = {}
req_args['url'] = url
if data is not None:
req_args['data'] = urllib.urlencode(data)
def __str__(self):
return self.contents
class UrlError(IOError):
def __init__(self, cause, code=None, headers=None):
IOError.__init__(self, str(cause))
self.cause = cause
self.code = code
self.headers = headers
if self.headers is None:
self.headers = {}
def readurl(url, data=None, timeout=None, retries=0, sec_between=1,
headers=None, ssl_details=None, check_status=True,
allow_redirects=True):
url = _cleanurl(url)
req_args = {
'url': url,
}
scheme = urlparse(url).scheme # pylint: disable=E1101
if scheme == 'https' and ssl_details:
if not SSL_ENABLED:
LOG.warn("SSL is not enabled, cert. verification can not occur!")
else:
if 'ca_certs' in ssl_details and ssl_details['ca_certs']:
req_args['verify'] = ssl_details['ca_certs']
else:
req_args['verify'] = True
if 'cert_file' in ssl_details and 'key_file' in ssl_details:
req_args['cert'] = [ssl_details['cert_file'],
ssl_details['key_file']]
elif 'cert_file' in ssl_details:
req_args['cert'] = str(ssl_details['cert_file'])
req_args['allow_redirects'] = allow_redirects
req_args['method'] = 'GET'
if timeout is not None:
req_args['timeout'] = max(float(timeout), 0)
if data:
req_args['method'] = 'POST'
# It doesn't seem like config
# was added in older library versions (or newer ones either), thus we
# need to manually do the retries if it wasn't...
if CONFIG_ENABLED:
req_config = {
'store_cookies': False,
}
# Don't use the retry support built-in
# since it doesn't allow for 'sleep_times'
# in between tries....
# if retries:
# req_config['max_retries'] = max(int(retries), 0)
req_args['config'] = req_config
manual_tries = 1
if retries:
manual_tries = max(int(retries) + 1, 1)
if not headers:
headers = {
'User-Agent': 'Cloud-Init/%s' % (version.version_string()),
}
req_args['headers'] = headers
req = urllib2.Request(**req_args)
retries = max(retries, 0)
attempts = retries + 1
excepts = []
LOG.debug(("Attempting to open '%s' with %s attempts"
" (%s retries, timeout=%s) to be performed"),
url, attempts, retries, timeout)
open_args = {}
if timeout is not None:
open_args['timeout'] = int(timeout)
for i in range(0, attempts):
LOG.debug("Attempting to open '%s' with %s configuration", url, req_args)
if data:
# Do this after the log (it might be large)
req_args['data'] = data
if sec_between is None:
sec_between = -1
excps = []
# Handle retrying ourselves since the built-in support
# doesn't handle sleeping between tries...
for i in range(0, manual_tries):
try:
with closing(urllib2.urlopen(req, **open_args)) as rh:
content = rh.read()
status = rh.getcode()
if status is None:
# This seems to happen when files are read...
status = 200
headers = {}
if rh.headers:
headers = dict(rh.headers)
LOG.debug("Read from %s (%s, %sb) after %s attempts",
url, status, len(content), (i + 1))
return UrlResponse(status, content, headers)
except urllib2.HTTPError as e:
excepts.append(e)
except urllib2.URLError as e:
# This can be a message string or
# another exception instance
# (socket.error for remote URLs, OSError for local URLs).
if (isinstance(e.reason, (OSError)) and
e.reason.errno == errno.ENOENT):
excepts.append(e.reason)
r = requests.request(**req_args)
if check_status:
r.raise_for_status() # pylint: disable=E1103
LOG.debug("Read from %s (%s, %sb) after %s attempts", url,
r.status_code, len(r.content), # pylint: disable=E1103
(i + 1))
# Doesn't seem like we can make it use a different
# subclass for responses, so add our own backward-compat
# attrs
return UrlResponse(r)
except exceptions.RequestException as e:
if (isinstance(e, (exceptions.HTTPError))
and hasattr(e, 'response') # This appeared in v 0.10.8
and e.response):
excps.append(UrlError(e, code=e.response.status_code,
headers=e.response.headers))
else:
excepts.append(e)
except Exception as e:
excepts.append(e)
if i + 1 < attempts:
LOG.debug("Please wait %s seconds while we wait to try again",
sec_between)
time.sleep(sec_between)
# Didn't work out
LOG.debug("Failed reading from %s after %s attempts", url, attempts)
# It must of errored at least once for code
# to get here so re-raise the last error
LOG.debug("%s errors occured, re-raising the last one", len(excepts))
raise excepts[-1]
excps.append(UrlError(e))
if SSL_ENABLED and isinstance(e, exceptions.SSLError):
# ssl exceptions are not going to get fixed by waiting a
# few seconds
break
if i + 1 < manual_tries and sec_between > 0:
LOG.debug("Please wait %s seconds while we wait to try again",
sec_between)
time.sleep(sec_between)
if excps:
raise excps[-1]
return None # Should throw before this...
def wait_for_url(urls, max_wait=None, timeout=None,
@ -143,7 +200,7 @@ def wait_for_url(urls, max_wait=None, timeout=None,
max_wait: roughly the maximum time to wait before giving up
The max time is *actually* len(urls)*timeout as each url will
be tried once and given the timeout provided.
timeout: the timeout provided to urllib2.urlopen
timeout: the timeout provided to urlopen
status_cb: call method with string message when a url is not available
headers_cb: call method with single argument of url to get headers
for request.
@ -190,36 +247,40 @@ def wait_for_url(urls, max_wait=None, timeout=None,
timeout = int((start_time + max_wait) - now)
reason = ""
e = None
try:
if headers_cb is not None:
headers = headers_cb(url)
else:
headers = {}
resp = readurl(url, headers=headers, timeout=timeout)
if not resp.contents:
reason = "empty response [%s]" % (resp.code)
e = ValueError(reason)
elif not resp.ok():
reason = "bad status code [%s]" % (resp.code)
e = ValueError(reason)
response = readurl(url, headers=headers, timeout=timeout,
check_status=False)
if not response.contents:
reason = "empty response [%s]" % (response.code)
e = UrlError(ValueError(reason),
code=response.code, headers=response.headers)
elif not response.ok():
reason = "bad status code [%s]" % (response.code)
e = UrlError(ValueError(reason),
code=response.code, headers=response.headers)
else:
return url
except urllib2.HTTPError as e:
reason = "http error [%s]" % e.code
except urllib2.URLError as e:
reason = "url error [%s]" % e.reason
except socket.timeout as e:
reason = "socket timeout [%s]" % e
except UrlError as e:
reason = "request error [%s]" % e
except Exception as e:
reason = "unexpected error [%s]" % e
time_taken = int(time.time() - start_time)
status_msg = "Calling '%s' failed [%s/%ss]: %s" % (url,
time_taken,
max_wait, reason)
time_taken,
max_wait,
reason)
status_cb(status_msg)
if exception_cb:
# This can be used to alter the headers that will be sent
# in the future, for example this is what the MAAS datasource
# does.
exception_cb(msg=status_msg, exception=e)
if timeup(max_wait, start_time):

View File

@ -29,7 +29,6 @@ from email.mime.text import MIMEText
from cloudinit import handlers
from cloudinit import log as logging
from cloudinit import url_helper
from cloudinit import util
LOG = logging.getLogger(__name__)
@ -60,6 +59,7 @@ EXAMINE_FOR_LAUNCH_INDEX = ["text/cloud-config"]
class UserDataProcessor(object):
def __init__(self, paths):
self.paths = paths
self.ssl_details = util.fetch_ssl_details(paths)
def process(self, blob):
accumulating_msg = MIMEMultipart()
@ -173,7 +173,8 @@ class UserDataProcessor(object):
if include_once_on and os.path.isfile(include_once_fn):
content = util.load_file(include_once_fn)
else:
resp = url_helper.readurl(include_url)
resp = util.read_file_or_url(include_url,
ssl_details=self.ssl_details)
if include_once_on and resp.ok():
util.write_file(include_once_fn, str(resp), mode=0600)
if resp.ok():

View File

@ -52,7 +52,7 @@ from cloudinit import log as logging
from cloudinit import mergers
from cloudinit import safeyaml
from cloudinit import type_utils
from cloudinit import url_helper as uhelp
from cloudinit import url_helper
from cloudinit import version
from cloudinit.settings import (CFG_BUILTIN)
@ -71,6 +71,31 @@ FN_ALLOWED = ('_-.()' + string.digits + string.ascii_letters)
CONTAINER_TESTS = ['running-in-container', 'lxc-is-container']
# Made to have same accessors as UrlResponse so that the
# read_file_or_url can return this or that object and the
# 'user' of those objects will not need to know the difference.
class StringResponse(object):
def __init__(self, contents, code=200):
self.code = code
self.headers = {}
self.contents = contents
self.url = None
def ok(self, *args, **kwargs): # pylint: disable=W0613
if self.code != 200:
return False
return True
def __str__(self):
return self.contents
class FileResponse(StringResponse):
def __init__(self, path, contents, code=200):
StringResponse.__init__(self, contents, code=code)
self.url = path
class ProcessExecutionError(IOError):
MESSAGE_TMPL = ('%(description)s\n'
@ -606,18 +631,62 @@ def read_optional_seed(fill, base="", ext="", timeout=5):
fill['user-data'] = ud
fill['meta-data'] = md
return True
except OSError as e:
except IOError as e:
if e.errno == errno.ENOENT:
return False
raise
def read_file_or_url(url, timeout=5, retries=10, file_retries=0):
def fetch_ssl_details(paths=None):
ssl_details = {}
# Lookup in these locations for ssl key/cert files
ssl_cert_paths = [
'/var/lib/cloud/data/ssl',
'/var/lib/cloud/instance/data/ssl',
]
if paths:
ssl_cert_paths.extend([
os.path.join(paths.get_ipath_cur('data'), 'ssl'),
os.path.join(paths.get_cpath('data'), 'ssl'),
])
ssl_cert_paths = uniq_merge(ssl_cert_paths)
ssl_cert_paths = [d for d in ssl_cert_paths if d and os.path.isdir(d)]
cert_file = None
for d in ssl_cert_paths:
if os.path.isfile(os.path.join(d, 'cert.pem')):
cert_file = os.path.join(d, 'cert.pem')
break
key_file = None
for d in ssl_cert_paths:
if os.path.isfile(os.path.join(d, 'key.pem')):
key_file = os.path.join(d, 'key.pem')
break
if cert_file and key_file:
ssl_details['cert_file'] = cert_file
ssl_details['key_file'] = key_file
elif cert_file:
ssl_details['cert_file'] = cert_file
return ssl_details
def read_file_or_url(url, timeout=5, retries=10,
headers=None, data=None, sec_between=1, ssl_details=None):
url = url.lstrip()
if url.startswith("/"):
url = "file://%s" % url
if url.startswith("file://"):
retries = file_retries
return uhelp.readurl(url, timeout=timeout, retries=retries)
if url.lower().startswith("file://"):
if data:
LOG.warn("Unable to post data to file resource %s", url)
file_path = url[len("file://"):]
return FileResponse(file_path, contents=load_file(file_path))
else:
return url_helper.readurl(url,
timeout=timeout,
retries=retries,
headers=headers,
data=data,
sec_between=sec_between,
ssl_details=ssl_details)
def load_yaml(blob, default=None, allowed=(dict,)):
@ -834,7 +903,7 @@ def get_cmdline_url(names=('cloud-config-url', 'url'),
if not url:
return (None, None, None)
resp = uhelp.readurl(url)
resp = read_file_or_url(url)
if resp.contents.startswith(starts) and resp.ok():
return (key, url, str(resp))

View File

@ -195,8 +195,8 @@ class TestCmdlineUrl(MockerTestCase):
mock_readurl = self.mocker.replace(url_helper.readurl,
passthrough=False)
mock_readurl(url)
self.mocker.result(url_helper.UrlResponse(200, payload))
mock_readurl(url, ARGS, KWARGS)
self.mocker.result(util.StringResponse(payload))
self.mocker.replay()
self.assertEqual((key, url, None),
@ -211,8 +211,8 @@ class TestCmdlineUrl(MockerTestCase):
mock_readurl = self.mocker.replace(url_helper.readurl,
passthrough=False)
mock_readurl(url)
self.mocker.result(url_helper.UrlResponse(200, payload))
mock_readurl(url, ARGS, KWARGS)
self.mocker.result(util.StringResponse(payload))
self.mocker.replay()
self.assertEqual((key, url, payload),
@ -225,7 +225,7 @@ class TestCmdlineUrl(MockerTestCase):
cmdline = "ro %s=%s bar=1" % (key, url)
self.mocker.replace(url_helper.readurl, passthrough=False)
self.mocker.result(url_helper.UrlResponse(400))
self.mocker.result(util.StringResponse(""))
self.mocker.replay()
self.assertEqual((None, None, None),

View File

@ -3,12 +3,13 @@ import os
from cloudinit.sources import DataSourceMAAS
from cloudinit import url_helper
from cloudinit import util
from tests.unittests.helpers import populate_dir
from mocker import MockerTestCase
import mocker
class TestMAASDataSource(MockerTestCase):
class TestMAASDataSource(mocker.MockerTestCase):
def setUp(self):
super(TestMAASDataSource, self).setUp()
@ -115,9 +116,11 @@ class TestMAASDataSource(MockerTestCase):
for key in valid_order:
url = "%s/%s/%s" % (my_seed, my_ver, key)
mock_request(url, headers=my_headers, timeout=None)
mock_request(url, headers=my_headers, timeout=mocker.ANY,
data=mocker.ANY, sec_between=mocker.ANY,
ssl_details=mocker.ANY, retries=mocker.ANY)
resp = valid.get(key)
self.mocker.result(url_helper.UrlResponse(200, resp))
self.mocker.result(util.StringResponse(resp))
self.mocker.replay()
(userdata, metadata) = DataSourceMAAS.read_maas_seed_url(my_seed,