From 6cb35e9b0f0d3077b91b43c375c9fdcbca0a0c91 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Tue, 20 Nov 2012 20:02:48 -0800 Subject: [PATCH 01/16] Start allowing different merging types to be applied After user data handling splits apart all the different content types into there various mime messages it is nice to be able to have each message specify how it should be merged (mainly for cloud-config or cloud-archive) into the single cloud config that is eventually used. This starts to add a plugable merging framework and the needed components to activate said headers and merging. --- cloudinit/handlers/__init__.py | 49 ++++++++------ cloudinit/handlers/boot_hook.py | 2 +- cloudinit/handlers/cloud_config.py | 22 +++--- cloudinit/handlers/shell_script.py | 2 +- cloudinit/handlers/upstart_job.py | 2 +- cloudinit/mergers/__init__.py | 104 +++++++++++++++++++++++++++++ cloudinit/mergers/dict.py | 33 +++++++++ cloudinit/mergers/list.py | 41 ++++++++++++ cloudinit/mergers/str.py | 28 ++++++++ 9 files changed, 246 insertions(+), 37 deletions(-) create mode 100644 cloudinit/mergers/__init__.py create mode 100644 cloudinit/mergers/dict.py create mode 100644 cloudinit/mergers/list.py create mode 100644 cloudinit/mergers/str.py diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index 8d6dcd4d..bfccfd89 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -69,7 +69,6 @@ INCLUSION_SRCH = sorted(list(INCLUSION_TYPES_MAP.keys()), class Handler(object): - __metaclass__ = abc.ABCMeta def __init__(self, frequency, version=2): @@ -83,15 +82,12 @@ class Handler(object): def list_types(self): raise NotImplementedError() - def handle_part(self, data, ctype, filename, payload, frequency): - return self._handle_part(data, ctype, filename, payload, frequency) - @abc.abstractmethod - def _handle_part(self, data, ctype, filename, payload, frequency): + def handle_part(self, *args, **kwargs): raise NotImplementedError() -def run_part(mod, data, ctype, filename, payload, frequency): +def run_part(mod, data, filename, payload, headers, frequency): mod_freq = mod.frequency if not (mod_freq == PER_ALWAYS or (frequency == PER_INSTANCE and mod_freq == PER_INSTANCE)): @@ -102,19 +98,25 @@ def run_part(mod, data, ctype, filename, payload, frequency): mod_ver = int(mod_ver) except: mod_ver = 1 + content_type = headers['Content-Type'] try: LOG.debug("Calling handler %s (%s, %s, %s) with frequency %s", - mod, ctype, filename, mod_ver, frequency) - if mod_ver >= 2: + mod, content_type, filename, mod_ver, frequency) + if mod_ver == 3: + # Treat as v. 3 which does get a frequency + headers + mod.handle_part(data, content_type, filename, + payload, frequency, headers) + elif mod_ver == 2: # Treat as v. 2 which does get a frequency - mod.handle_part(data, ctype, filename, payload, frequency) + mod.handle_part(data, content_type, filename, + payload, frequency) else: # Treat as v. 1 which gets no frequency - mod.handle_part(data, ctype, filename, payload) + mod.handle_part(data, content_type, filename, payload) except: util.logexc(LOG, ("Failed calling handler %s (%s, %s, %s)" " with frequency %s"), - mod, ctype, filename, + mod, content_type, filename, mod_ver, frequency) @@ -173,26 +175,27 @@ def _escape_string(text): return text -def walker_callback(pdata, ctype, filename, payload): - if ctype in PART_CONTENT_TYPES: - walker_handle_handler(pdata, ctype, filename, payload) +def walker_callback(data, filename, payload, headers): + content_type = headers['Content-Type'] + if content_type in PART_CONTENT_TYPES: + walker_handle_handler(data, content_type, filename, payload) return - handlers = pdata['handlers'] - if ctype in pdata['handlers']: - run_part(handlers[ctype], pdata['data'], ctype, filename, - payload, pdata['frequency']) + handlers = data['handlers'] + if content_type in handlers: + run_part(handlers[content_type], data['data'], filename, + payload, headers, data['frequency']) elif payload: # Extract the first line or 24 bytes for displaying in the log start = _extract_first_or_bytes(payload, 24) details = "'%s...'" % (_escape_string(start)) if ctype == NOT_MULTIPART_TYPE: LOG.warning("Unhandled non-multipart (%s) userdata: %s", - ctype, details) + content_type, details) else: LOG.warning("Unhandled unknown content-type (%s) userdata: %s", - ctype, details) + content_type, details) else: - LOG.debug("empty payload of type %s" % ctype) + LOG.debug("Empty payload of type %s", content_type) # Callback is a function that will be called with @@ -212,7 +215,9 @@ def walk(msg, callback, data): if not filename: filename = PART_FN_TPL % (partnum) - callback(data, ctype, filename, part.get_payload(decode=True)) + callback(data, ctype, filename, + part.get_payload(decode=True), + dict(part)) partnum = partnum + 1 diff --git a/cloudinit/handlers/boot_hook.py b/cloudinit/handlers/boot_hook.py index 456b8020..bf313f10 100644 --- a/cloudinit/handlers/boot_hook.py +++ b/cloudinit/handlers/boot_hook.py @@ -56,7 +56,7 @@ class BootHookPartHandler(handlers.Handler): util.write_file(filepath, contents, 0700) return filepath - def _handle_part(self, _data, ctype, filename, payload, _frequency): + def handle_part(self, _data, ctype, filename, payload, _frequency): if ctype in handlers.CONTENT_SIGNALS: return diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index f6d95244..86027187 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -22,6 +22,7 @@ from cloudinit import handlers from cloudinit import log as logging +from cloudinit import mergers from cloudinit import util from cloudinit.settings import (PER_ALWAYS) @@ -31,8 +32,8 @@ LOG = logging.getLogger(__name__) class CloudConfigPartHandler(handlers.Handler): def __init__(self, paths, **_kwargs): - handlers.Handler.__init__(self, PER_ALWAYS) - self.cloud_buf = [] + handlers.Handler.__init__(self, PER_ALWAYS, version=3) + self.cloud_buf = {} self.cloud_fn = paths.get_ipath("cloud_config") def list_types(self): @@ -43,20 +44,17 @@ class CloudConfigPartHandler(handlers.Handler): def _write_cloud_config(self, buf): if not self.cloud_fn: return - lines = [str(b) for b in buf] - payload = "\n".join(lines) + payload = util.yaml_dumps(self.cloud_buf) util.write_file(self.cloud_fn, payload, 0600) - def _handle_part(self, _data, ctype, filename, payload, _frequency): + def handle_part(self, _data, ctype, filename, payload, _frequency, headers): if ctype == handlers.CONTENT_START: - self.cloud_buf = [] + self.cloud_buf = {} return if ctype == handlers.CONTENT_END: self._write_cloud_config(self.cloud_buf) - self.cloud_buf = [] + self.cloud_buf = {} return - - filename = util.clean_filename(filename) - if not filename: - filename = '??' - self.cloud_buf.extend(["#%s" % (filename), str(payload)]) + merge_how = headers.get("Merge-Type", 'list+dict+str') + merger = mergers.construct(merge_how) + self.cloud_buf = merger.merge(self.cloud_buf, util.load_yaml(payload)) diff --git a/cloudinit/handlers/shell_script.py b/cloudinit/handlers/shell_script.py index 6c5c11ca..2a87e8dd 100644 --- a/cloudinit/handlers/shell_script.py +++ b/cloudinit/handlers/shell_script.py @@ -41,7 +41,7 @@ class ShellScriptPartHandler(handlers.Handler): handlers.type_from_starts_with("#!"), ] - def _handle_part(self, _data, ctype, filename, payload, _frequency): + def handle_part(self, _data, ctype, filename, payload, _frequency): if ctype in handlers.CONTENT_SIGNALS: # TODO(harlowja): maybe delete existing things here return diff --git a/cloudinit/handlers/upstart_job.py b/cloudinit/handlers/upstart_job.py index 99e0afde..a5cb9b0c 100644 --- a/cloudinit/handlers/upstart_job.py +++ b/cloudinit/handlers/upstart_job.py @@ -42,7 +42,7 @@ class UpstartJobPartHandler(handlers.Handler): handlers.type_from_starts_with("#upstart-job"), ] - def _handle_part(self, _data, ctype, filename, payload, frequency): + def handle_part(self, _data, ctype, filename, payload, frequency): if ctype in handlers.CONTENT_SIGNALS: return diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py new file mode 100644 index 00000000..b3e728b0 --- /dev/null +++ b/cloudinit/mergers/__init__.py @@ -0,0 +1,104 @@ +# vi: ts=4 expandtab +# +# Copyright (C) 2012 Yahoo! Inc. +# +# Author: Joshua Harlow +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 3, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +from cloudinit import importer +from cloudinit import log as logging +from cloudinit import util + +LOG = logging.getLogger(__name__) + + +class UnknownMerger(object): + # Named differently so auto-method finding + # doesn't pick this up if there is ever a type + # named "unknown" + def _handle_unknown(self, meth_wanted, value, merge_with): + return value + + def merge(self, source, merge_with): + type_name = util.obj_name(source) + type_name = type_name.lower() + method_name = "_on_%s" % (type_name) + meth = None + args = [source, merge_with] + if hasattr(self, method_name): + meth = getattr(self, method_name) + if not meth: + meth = self._handle_unknown + args.insert(0, method_name) + return meth(*args) + + +class LookupMerger(UnknownMerger): + def __init__(self, lookups=None): + UnknownMerger.__init__(self) + if lookups is None: + self._lookups = [] + else: + self._lookups = lookups + + def _handle_unknown(self, meth_wanted, value, merge_with): + meth = None + for merger in self._lookups: + if hasattr(merger, meth_wanted): + # First one that has that method/attr gets to be + # the one that will be called + meth = getattr(merger, meth_wanted) + break + if not meth: + return UnknownMerger._handle_unknown(self, meth_wanted, + value, merge_with) + return meth(value, merge_with) + + +def _extract_merger_names(merge_how): + names = [] + for m_name in merge_how.split("+"): + # Canonicalize the name (so that it can be found + # even when users alter it in various ways... + m_name = m_name.lower().strip() + m_name = m_name.replace(" ", "_") + m_name = m_name.replace("\t", "_") + m_name = m_name.replace("-", "_") + if not m_name: + continue + names.append(m_name) + return names + + +def construct(merge_how, default_classes=None): + mergers = [] + merger_classes = [] + root = LookupMerger(mergers) + for m_name in _extract_merger_names(merge_how): + merger_locs = importer.find_module(m_name, + [__name__], + ['Merger']) + if not merger_locs: + msg = "Could not find merger named %s" % (m_name) + raise ImportError(msg) + else: + mod = importer.import_module(merger_locs[0]) + cls = getattr(mod, 'Merger') + merger_classes.append(cls) + if not merger_classes and default_classes: + merger_classes = default_classes + for m_class in merger_classes: + mergers.append(m_class(root)) + return root \ No newline at end of file diff --git a/cloudinit/mergers/dict.py b/cloudinit/mergers/dict.py new file mode 100644 index 00000000..a0ffaa33 --- /dev/null +++ b/cloudinit/mergers/dict.py @@ -0,0 +1,33 @@ +# vi: ts=4 expandtab +# +# Copyright (C) 2012 Yahoo! Inc. +# +# Author: Joshua Harlow +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 3, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +class Merger(object): + def __init__(self, merger): + self._merger = merger + + def _on_dict(self, value, merge_with): + if not isinstance(merge_with, (dict)): + return value + merged = dict(value) + for (k, v) in merge_with.items(): + if k in merged: + merged[k] = self._merger.merge(merged[k], v) + else: + merged[k] = v + return merged diff --git a/cloudinit/mergers/list.py b/cloudinit/mergers/list.py new file mode 100644 index 00000000..ad1b9793 --- /dev/null +++ b/cloudinit/mergers/list.py @@ -0,0 +1,41 @@ +# vi: ts=4 expandtab +# +# Copyright (C) 2012 Yahoo! Inc. +# +# Author: Joshua Harlow +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 3, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +class Merger(object): + def __init__(self, merger): + self._merger = merger + + def _on_tuple(self, value, merge_with): + return self._on_list(list(value), merge_with) + + def _on_list(self, value, merge_with): + if isinstance(merge_with, (tuple, list)): + new_value = list(value) + for m_v in merge_with: + m_am = 0 + for (i, o_v) in enumerate(new_value): + if m_v == o_v: + new_value[i] = self._merger.merge(o_v, m_v) + m_am += 1 + if m_am == 0: + new_value.append(m_v) + else: + new_value = list(value) + new_value.append(merge_with) + return new_value diff --git a/cloudinit/mergers/str.py b/cloudinit/mergers/str.py new file mode 100644 index 00000000..7c3fa585 --- /dev/null +++ b/cloudinit/mergers/str.py @@ -0,0 +1,28 @@ +# vi: ts=4 expandtab +# +# Copyright (C) 2012 Yahoo! Inc. +# +# Author: Joshua Harlow +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 3, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + + +class Merger(object): + def __init__(self, merger): + pass + + def _on_unicode(self, value, merge_with): + return self._on_str(value, merge_with) + + def _on_str(self, value, merge_with): + return value From 957b8f79611da42a35df4d2643a407f05c294f1b Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Thu, 22 Nov 2012 08:21:37 -0800 Subject: [PATCH 02/16] Continue working on merging prototype. --- cloudinit/handlers/__init__.py | 26 +++++++++++++++++--------- cloudinit/handlers/cloud_config.py | 27 +++++++++++++++++++++------ 2 files changed, 38 insertions(+), 15 deletions(-) diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index bfccfd89..566b61a7 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -92,14 +92,14 @@ def run_part(mod, data, filename, payload, headers, frequency): if not (mod_freq == PER_ALWAYS or (frequency == PER_INSTANCE and mod_freq == PER_INSTANCE)): return - mod_ver = mod.handler_version # Sanity checks on version (should be an int convertable) try: + mod_ver = mod.handler_version mod_ver = int(mod_ver) - except: + except (TypeError, ValueError, AttributeError): mod_ver = 1 - content_type = headers['Content-Type'] try: + content_type = headers['Content-Type'] LOG.debug("Calling handler %s (%s, %s, %s) with frequency %s", mod, content_type, filename, mod_ver, frequency) if mod_ver == 3: @@ -110,9 +110,11 @@ def run_part(mod, data, filename, payload, headers, frequency): # Treat as v. 2 which does get a frequency mod.handle_part(data, content_type, filename, payload, frequency) - else: + elif mod_ver == 1: # Treat as v. 1 which gets no frequency mod.handle_part(data, content_type, filename, payload) + else: + raise ValueError("Unknown module version %s" % (mod_ver)) except: util.logexc(LOG, ("Failed calling handler %s (%s, %s, %s)" " with frequency %s"), @@ -121,11 +123,17 @@ def run_part(mod, data, filename, payload, headers, frequency): def call_begin(mod, data, frequency): - run_part(mod, data, CONTENT_START, None, None, frequency) + headers = { + 'Content-Type': CONTENT_START, + } + run_part(mod, data, None, None, headers, frequency) def call_end(mod, data, frequency): - run_part(mod, data, CONTENT_END, None, None, frequency) + headers = { + 'Content-Type': CONTENT_END, + } + run_part(mod, data, None, None, headers, frequency) def walker_handle_handler(pdata, _ctype, _filename, payload): @@ -215,9 +223,9 @@ def walk(msg, callback, data): if not filename: filename = PART_FN_TPL % (partnum) - callback(data, ctype, filename, - part.get_payload(decode=True), - dict(part)) + headers = dict(part) + headers['Content-Type'] = ctype + callback(data, filename, part.get_payload(decode=True), headers) partnum = partnum + 1 diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index 86027187..22ced20d 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -29,6 +29,8 @@ from cloudinit.settings import (PER_ALWAYS) LOG = logging.getLogger(__name__) +DEF_MERGE_TYPE = "list+dict+str" + class CloudConfigPartHandler(handlers.Handler): def __init__(self, paths, **_kwargs): @@ -44,10 +46,25 @@ class CloudConfigPartHandler(handlers.Handler): def _write_cloud_config(self, buf): if not self.cloud_fn: return - payload = util.yaml_dumps(self.cloud_buf) - util.write_file(self.cloud_fn, payload, 0600) + lines = ["#cloud-config", util.yaml_dumps(self.cloud_buf)] + util.write_file(self.cloud_fn, "\n".join(lines), 0600) - def handle_part(self, _data, ctype, filename, payload, _frequency, headers): + def _merge_part(self, payload, headers, filename): + merge_how = headers.get("Merge-Type") + try: + payload_y = util.load_yaml(payload) + if not merge_how: + merge_how = payload_y.pop("Merge-Type", '') + merge_how = merge_how.strip().lower() + if not merge_how: + merge_how = DEF_MERGE_TYPE + merger = mergers.construct(merge_how) + self.cloud_buf = merger.merge(self.cloud_buf, payload_y) + except: + util.logexc(LOG, "Failed at merging in cloud config part from %s", + filename) + + def handle_part(self, _data, ctype, filename, payload, _freq, headers): if ctype == handlers.CONTENT_START: self.cloud_buf = {} return @@ -55,6 +72,4 @@ class CloudConfigPartHandler(handlers.Handler): self._write_cloud_config(self.cloud_buf) self.cloud_buf = {} return - merge_how = headers.get("Merge-Type", 'list+dict+str') - merger = mergers.construct(merge_how) - self.cloud_buf = merger.merge(self.cloud_buf, util.load_yaml(payload)) + self._merge_part(payload, headers, filename) From 1198b26a6bf0d620e410b3b9a51fe2a1b1bc1e34 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Thu, 22 Nov 2012 08:24:25 -0800 Subject: [PATCH 03/16] Select merge-type from either header or content after loading as yaml. --- cloudinit/handlers/cloud_config.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index 22ced20d..ba07b2ef 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -50,11 +50,13 @@ class CloudConfigPartHandler(handlers.Handler): util.write_file(self.cloud_fn, "\n".join(lines), 0600) def _merge_part(self, payload, headers, filename): - merge_how = headers.get("Merge-Type") + merge_headers_how = headers.get("Merge-Type") try: payload_y = util.load_yaml(payload) - if not merge_how: - merge_how = payload_y.pop("Merge-Type", '') + merge_how = '' + for merge_i in [payload_y.pop("Merge-Type", ''), merge_headers_how]: + if merge_i: + merge_how = merge_i merge_how = merge_how.strip().lower() if not merge_how: merge_how = DEF_MERGE_TYPE From 2b6240b4e0287f5d7b06aff6295caa7c9c810c15 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Thu, 22 Nov 2012 08:26:20 -0800 Subject: [PATCH 04/16] Continue work. --- cloudinit/handlers/cloud_config.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index ba07b2ef..50fbb445 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -50,13 +50,14 @@ class CloudConfigPartHandler(handlers.Handler): util.write_file(self.cloud_fn, "\n".join(lines), 0600) def _merge_part(self, payload, headers, filename): - merge_headers_how = headers.get("Merge-Type") + merge_headers = headers.get("Merge-Type") try: payload_y = util.load_yaml(payload) merge_how = '' - for merge_i in [payload_y.pop("Merge-Type", ''), merge_headers_how]: + for merge_i in [payload_y.pop("Merge-Type", ''), merge_headers]: if merge_i: merge_how = merge_i + break merge_how = merge_how.strip().lower() if not merge_how: merge_how = DEF_MERGE_TYPE From ec9ce5d6393344b842e82a71c8b751e80532516c Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Thu, 22 Nov 2012 08:49:48 -0800 Subject: [PATCH 05/16] Change the yaml merge header extraction to be in a sep. function that can look in more places. --- cloudinit/handlers/cloud_config.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index 50fbb445..de9c5252 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -30,6 +30,7 @@ from cloudinit.settings import (PER_ALWAYS) LOG = logging.getLogger(__name__) DEF_MERGE_TYPE = "list+dict+str" +MERGE_HEADER = 'Merge-Type' class CloudConfigPartHandler(handlers.Handler): @@ -46,19 +47,33 @@ class CloudConfigPartHandler(handlers.Handler): def _write_cloud_config(self, buf): if not self.cloud_fn: return + # Write the combined & merged dictionary/yaml out lines = ["#cloud-config", util.yaml_dumps(self.cloud_buf)] util.write_file(self.cloud_fn, "\n".join(lines), 0600) + def _merge_header_extract(self, payload_yaml): + merge_header_yaml = '' + for k in [MERGE_HEADER, MERGE_HEADER.lower(), + MERGE_HEADER.lower().replace("-", "_")]: + if k in payload_yaml: + merge_header_yaml = str(payload_yaml[k]) + break + return merge_header_yaml + def _merge_part(self, payload, headers, filename): - merge_headers = headers.get("Merge-Type") + merge_header_headers = headers.get(MERGE_HEADER, '') try: payload_y = util.load_yaml(payload) merge_how = '' - for merge_i in [payload_y.pop("Merge-Type", ''), merge_headers]: + # Select either the merge-type from the content + # or the merge type from the headers or default to our own set + # if neither exists (or is empty) from the later + merge_header_yaml = self._merge_header_extract(payload_y) + for merge_i in [merge_header_yaml, merge_header_headers]: + merge_i = merge_i.strip().lower() if merge_i: merge_how = merge_i break - merge_how = merge_how.strip().lower() if not merge_how: merge_how = DEF_MERGE_TYPE merger = mergers.construct(merge_how) From 96e551f6902482f81010f941204ca781dfcc9680 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Thu, 22 Nov 2012 08:52:35 -0800 Subject: [PATCH 06/16] Adjust naming and exception catching. --- cloudinit/handlers/cloud_config.py | 42 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index de9c5252..1cfbc210 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -60,27 +60,23 @@ class CloudConfigPartHandler(handlers.Handler): break return merge_header_yaml - def _merge_part(self, payload, headers, filename): + def _merge_part(self, payload, headers): merge_header_headers = headers.get(MERGE_HEADER, '') - try: - payload_y = util.load_yaml(payload) - merge_how = '' - # Select either the merge-type from the content - # or the merge type from the headers or default to our own set - # if neither exists (or is empty) from the later - merge_header_yaml = self._merge_header_extract(payload_y) - for merge_i in [merge_header_yaml, merge_header_headers]: - merge_i = merge_i.strip().lower() - if merge_i: - merge_how = merge_i - break - if not merge_how: - merge_how = DEF_MERGE_TYPE - merger = mergers.construct(merge_how) - self.cloud_buf = merger.merge(self.cloud_buf, payload_y) - except: - util.logexc(LOG, "Failed at merging in cloud config part from %s", - filename) + payload_yaml = util.load_yaml(payload) + merge_how = '' + # Select either the merge-type from the content + # or the merge type from the headers or default to our own set + # if neither exists (or is empty) from the later + merge_header_yaml = self._merge_header_extract(payload_yaml) + for merge_i in [merge_header_yaml, merge_header_headers]: + merge_i = merge_i.strip().lower() + if merge_i: + merge_how = merge_i + break + if not merge_how: + merge_how = DEF_MERGE_TYPE + merger = mergers.construct(merge_how) + self.cloud_buf = merger.merge(self.cloud_buf, payload_yaml) def handle_part(self, _data, ctype, filename, payload, _freq, headers): if ctype == handlers.CONTENT_START: @@ -90,4 +86,8 @@ class CloudConfigPartHandler(handlers.Handler): self._write_cloud_config(self.cloud_buf) self.cloud_buf = {} return - self._merge_part(payload, headers, filename) + try: + self._merge_part(payload, headers) + except: + util.logexc(LOG, "Failed at merging in cloud config part from %s", + filename) From 32473d864e9116670d2b60e6688eebc11d253f8e Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Thu, 22 Nov 2012 09:00:14 -0800 Subject: [PATCH 07/16] Add which files the yaml blob came from. --- cloudinit/handlers/cloud_config.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index 1cfbc210..9a8782bb 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -38,6 +38,7 @@ class CloudConfigPartHandler(handlers.Handler): handlers.Handler.__init__(self, PER_ALWAYS, version=3) self.cloud_buf = {} self.cloud_fn = paths.get_ipath("cloud_config") + self.file_names = [] def list_types(self): return [ @@ -48,7 +49,17 @@ class CloudConfigPartHandler(handlers.Handler): if not self.cloud_fn: return # Write the combined & merged dictionary/yaml out - lines = ["#cloud-config", util.yaml_dumps(self.cloud_buf)] + lines = [ + "#cloud-config", + '', + ] + # Write which files we merged from + if self.file_names: + lines.append("# from %s files" % (len(self.file_names))) + for fn in self.file_names: + lines.append("# %s" % (fn)) + lines.append("") + lines.append(util.yaml_dumps(self.cloud_buf)) util.write_file(self.cloud_fn, "\n".join(lines), 0600) def _merge_header_extract(self, payload_yaml): @@ -78,16 +89,21 @@ class CloudConfigPartHandler(handlers.Handler): merger = mergers.construct(merge_how) self.cloud_buf = merger.merge(self.cloud_buf, payload_yaml) + def _reset(self): + self.file_names = [] + self.cloud_buf = {} + def handle_part(self, _data, ctype, filename, payload, _freq, headers): if ctype == handlers.CONTENT_START: - self.cloud_buf = {} + self._reset() return if ctype == handlers.CONTENT_END: self._write_cloud_config(self.cloud_buf) - self.cloud_buf = {} + self._reset() return try: self._merge_part(payload, headers) + self.file_names.append(filename) except: util.logexc(LOG, "Failed at merging in cloud config part from %s", filename) From 91ec2687ea46a97bce2e2719af721d5df119fe89 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Thu, 22 Nov 2012 19:45:43 -0800 Subject: [PATCH 08/16] Allow mergers to take options. --- cloudinit/handlers/cloud_config.py | 2 +- cloudinit/mergers/__init__.py | 37 ++++++++++++++++++------------ cloudinit/mergers/dict.py | 11 +++++++-- cloudinit/mergers/list.py | 29 +++++++++++++++-------- cloudinit/mergers/str.py | 12 +++++++--- 5 files changed, 60 insertions(+), 31 deletions(-) diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index 9a8782bb..02a7ad9d 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -29,7 +29,7 @@ from cloudinit.settings import (PER_ALWAYS) LOG = logging.getLogger(__name__) -DEF_MERGE_TYPE = "list+dict+str" +DEF_MERGE_TYPE = "list()+dict()+str()" MERGE_HEADER = 'Merge-Type' diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py index b3e728b0..20658edc 100644 --- a/cloudinit/mergers/__init__.py +++ b/cloudinit/mergers/__init__.py @@ -16,11 +16,14 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . +import re from cloudinit import importer from cloudinit import log as logging from cloudinit import util +NAME_MTCH = re.compile(r"(^[a-zA-Z_][A-Za-z0-9_]*)\((.*?)\)$") + LOG = logging.getLogger(__name__) @@ -71,10 +74,8 @@ def _extract_merger_names(merge_how): names = [] for m_name in merge_how.split("+"): # Canonicalize the name (so that it can be found - # even when users alter it in various ways... + # even when users alter it in various ways) m_name = m_name.lower().strip() - m_name = m_name.replace(" ", "_") - m_name = m_name.replace("\t", "_") m_name = m_name.replace("-", "_") if not m_name: continue @@ -82,23 +83,29 @@ def _extract_merger_names(merge_how): return names -def construct(merge_how, default_classes=None): - mergers = [] - merger_classes = [] - root = LookupMerger(mergers) - for m_name in _extract_merger_names(merge_how): +def construct(merge_how): + mergers_to_be = [] + for name in _extract_merger_names(merge_how): + match = NAME_MTCH.match(name) + if not match: + msg = "Matcher identifer '%s' is not in the right format" % (name) + raise ValueError(msg) + (m_name, m_ops) = match.groups() + m_ops = m_ops.strip().split(",") + m_ops = [m.strip().lower() for m in m_ops if m.strip()] merger_locs = importer.find_module(m_name, [__name__], ['Merger']) if not merger_locs: - msg = "Could not find merger named %s" % (m_name) + msg = "Could not find merger named '%s'" % (m_name) raise ImportError(msg) else: mod = importer.import_module(merger_locs[0]) - cls = getattr(mod, 'Merger') - merger_classes.append(cls) - if not merger_classes and default_classes: - merger_classes = default_classes - for m_class in merger_classes: - mergers.append(m_class(root)) + mod_attr = getattr(mod, 'Merger') + mergers_to_be.append((mod_attr, m_ops)) + # Now form them... + mergers = [] + root = LookupMerger(mergers) + for (attr, opts) in mergers_to_be: + mergers.append(attr(root, opts)) return root \ No newline at end of file diff --git a/cloudinit/mergers/dict.py b/cloudinit/mergers/dict.py index a0ffaa33..e7073bd9 100644 --- a/cloudinit/mergers/dict.py +++ b/cloudinit/mergers/dict.py @@ -18,8 +18,12 @@ class Merger(object): - def __init__(self, merger): + def __init__(self, merger, opts): self._merger = merger + self._overwrite = 'overwrite' in opts + + if opts and opts.lower().find("overwrite") != -1: + self._overwrite = True def _on_dict(self, value, merge_with): if not isinstance(merge_with, (dict)): @@ -27,7 +31,10 @@ class Merger(object): merged = dict(value) for (k, v) in merge_with.items(): if k in merged: - merged[k] = self._merger.merge(merged[k], v) + if not self._overwrite: + merged[k] = self._merger.merge(merged[k], v) + else: + merged[k] = v else: merged[k] = v return merged diff --git a/cloudinit/mergers/list.py b/cloudinit/mergers/list.py index ad1b9793..0c65d053 100644 --- a/cloudinit/mergers/list.py +++ b/cloudinit/mergers/list.py @@ -18,8 +18,10 @@ class Merger(object): - def __init__(self, merger): + def __init__(self, merger, opts): self._merger = merger + self._discard_non = 'discard_non_list' in opts + self._append = 'append' in opts def _on_tuple(self, value, merge_with): return self._on_list(list(value), merge_with) @@ -27,15 +29,22 @@ class Merger(object): def _on_list(self, value, merge_with): if isinstance(merge_with, (tuple, list)): new_value = list(value) - for m_v in merge_with: - m_am = 0 - for (i, o_v) in enumerate(new_value): - if m_v == o_v: - new_value[i] = self._merger.merge(o_v, m_v) - m_am += 1 - if m_am == 0: - new_value.append(m_v) + if self._append: + new_value.extend(merge_with) + else: + # Merge instead + for m_v in merge_with: + m_am = 0 + for (i, o_v) in enumerate(new_value): + if m_v == o_v: + new_value[i] = self._merger.merge(o_v, m_v) + m_am += 1 + if m_am == 0: + new_value.append(m_v) else: new_value = list(value) - new_value.append(merge_with) + if self._discard_non: + pass + else: + new_value.append(merge_with) return new_value diff --git a/cloudinit/mergers/str.py b/cloudinit/mergers/str.py index 7c3fa585..14bc46ec 100644 --- a/cloudinit/mergers/str.py +++ b/cloudinit/mergers/str.py @@ -18,11 +18,17 @@ class Merger(object): - def __init__(self, merger): - pass + def __init__(self, merger, opts): + self._append = 'append' in opts def _on_unicode(self, value, merge_with): return self._on_str(value, merge_with) def _on_str(self, value, merge_with): - return value + if not self._append: + return value + else: + if isinstance(value, (unicode)): + return value + unicode(merge_with) + else: + return value + str(merge_with) From eb0640957dfce842c0f841bfebd13c82a49a5f7e Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Thu, 22 Nov 2012 19:51:33 -0800 Subject: [PATCH 09/16] More cleanups. --- cloudinit/mergers/dict.py | 3 --- cloudinit/mergers/list.py | 11 ++++------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/cloudinit/mergers/dict.py b/cloudinit/mergers/dict.py index e7073bd9..bc392afa 100644 --- a/cloudinit/mergers/dict.py +++ b/cloudinit/mergers/dict.py @@ -21,9 +21,6 @@ class Merger(object): def __init__(self, merger, opts): self._merger = merger self._overwrite = 'overwrite' in opts - - if opts and opts.lower().find("overwrite") != -1: - self._overwrite = True def _on_dict(self, value, merge_with): if not isinstance(merge_with, (dict)): diff --git a/cloudinit/mergers/list.py b/cloudinit/mergers/list.py index 0c65d053..a848b8d6 100644 --- a/cloudinit/mergers/list.py +++ b/cloudinit/mergers/list.py @@ -21,15 +21,15 @@ class Merger(object): def __init__(self, merger, opts): self._merger = merger self._discard_non = 'discard_non_list' in opts - self._append = 'append' in opts + self._extend = 'extend' in opts def _on_tuple(self, value, merge_with): return self._on_list(list(value), merge_with) def _on_list(self, value, merge_with): + new_value = list(value) if isinstance(merge_with, (tuple, list)): - new_value = list(value) - if self._append: + if self._extend: new_value.extend(merge_with) else: # Merge instead @@ -42,9 +42,6 @@ class Merger(object): if m_am == 0: new_value.append(m_v) else: - new_value = list(value) - if self._discard_non: - pass - else: + if not self._discard_non: new_value.append(merge_with) return new_value From d5a0291a1e68e33fbd1894baf7fb0c354468ef05 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Tue, 5 Mar 2013 19:16:01 -0800 Subject: [PATCH 10/16] Add in a bunch of changes and tests. --- cloudinit/handlers/__init__.py | 15 ++- cloudinit/handlers/cloud_config.py | 89 ++++++++------ cloudinit/mergers/__init__.py | 59 +++++++-- cloudinit/mergers/dict.py | 11 ++ cloudinit/mergers/list.py | 21 ++-- cloudinit/mergers/str.py | 5 + tests/unittests/test__init__.py | 27 +++-- tests/unittests/test_merging.py | 187 +++++++++++++++++++++-------- tests/unittests/test_userdata.py | 80 ++++++++++-- 9 files changed, 359 insertions(+), 135 deletions(-) diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index 566b61a7..63fdb948 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -87,7 +87,7 @@ class Handler(object): raise NotImplementedError() -def run_part(mod, data, filename, payload, headers, frequency): +def run_part(mod, data, filename, payload, frequency, headers): mod_freq = mod.frequency if not (mod_freq == PER_ALWAYS or (frequency == PER_INSTANCE and mod_freq == PER_INSTANCE)): @@ -98,8 +98,8 @@ def run_part(mod, data, filename, payload, headers, frequency): mod_ver = int(mod_ver) except (TypeError, ValueError, AttributeError): mod_ver = 1 + content_type = headers['Content-Type'] try: - content_type = headers['Content-Type'] LOG.debug("Calling handler %s (%s, %s, %s) with frequency %s", mod, content_type, filename, mod_ver, frequency) if mod_ver == 3: @@ -123,17 +123,19 @@ def run_part(mod, data, filename, payload, headers, frequency): def call_begin(mod, data, frequency): + # Create a fake header set headers = { 'Content-Type': CONTENT_START, } - run_part(mod, data, None, None, headers, frequency) + run_part(mod, data, None, None, frequency, headers) def call_end(mod, data, frequency): + # Create a fake header set headers = { 'Content-Type': CONTENT_END, } - run_part(mod, data, None, None, headers, frequency) + run_part(mod, data, None, None, frequency, headers) def walker_handle_handler(pdata, _ctype, _filename, payload): @@ -191,12 +193,12 @@ def walker_callback(data, filename, payload, headers): handlers = data['handlers'] if content_type in handlers: run_part(handlers[content_type], data['data'], filename, - payload, headers, data['frequency']) + payload, data['frequency'], headers) elif payload: # Extract the first line or 24 bytes for displaying in the log start = _extract_first_or_bytes(payload, 24) details = "'%s...'" % (_escape_string(start)) - if ctype == NOT_MULTIPART_TYPE: + if content_type == NOT_MULTIPART_TYPE: LOG.warning("Unhandled non-multipart (%s) userdata: %s", content_type, details) else: @@ -224,6 +226,7 @@ def walk(msg, callback, data): filename = PART_FN_TPL % (partnum) headers = dict(part) + LOG.debug(headers) headers['Content-Type'] = ctype callback(data, filename, part.get_payload(decode=True), headers) partnum = partnum + 1 diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index 02a7ad9d..d458dee2 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -29,16 +29,19 @@ from cloudinit.settings import (PER_ALWAYS) LOG = logging.getLogger(__name__) -DEF_MERGE_TYPE = "list()+dict()+str()" +DEF_MERGE_TYPE = "list(extend)+dict()+str(append)" MERGE_HEADER = 'Merge-Type' class CloudConfigPartHandler(handlers.Handler): def __init__(self, paths, **_kwargs): handlers.Handler.__init__(self, PER_ALWAYS, version=3) - self.cloud_buf = {} + self.cloud_buf = None self.cloud_fn = paths.get_ipath("cloud_config") self.file_names = [] + self.mergers = [ + mergers.string_extract_mergers(DEF_MERGE_TYPE), + ] def list_types(self): return [ @@ -48,50 +51,64 @@ class CloudConfigPartHandler(handlers.Handler): def _write_cloud_config(self, buf): if not self.cloud_fn: return - # Write the combined & merged dictionary/yaml out - lines = [ - "#cloud-config", - '', - ] - # Write which files we merged from + # Capture which files we merged from... + file_lines = [] if self.file_names: - lines.append("# from %s files" % (len(self.file_names))) + file_lines.append("# from %s files" % (len(self.file_names))) for fn in self.file_names: - lines.append("# %s" % (fn)) - lines.append("") - lines.append(util.yaml_dumps(self.cloud_buf)) + file_lines.append("# %s" % (fn)) + file_lines.append("") + if self.cloud_buf is not None: + lines = [ + "#cloud-config", + '', + ] + lines.extend(file_lines) + lines.append(util.yaml_dumps(self.cloud_buf)) + else: + lines = [] util.write_file(self.cloud_fn, "\n".join(lines), 0600) - def _merge_header_extract(self, payload_yaml): - merge_header_yaml = '' - for k in [MERGE_HEADER, MERGE_HEADER.lower(), - MERGE_HEADER.lower().replace("-", "_")]: - if k in payload_yaml: - merge_header_yaml = str(payload_yaml[k]) + def _extract_mergers(self, payload, headers): + merge_header_headers = '' + for h in [MERGE_HEADER, 'X-%s' % (MERGE_HEADER)]: + tmp_h = headers.get(h, '') + if tmp_h: + merge_header_headers = tmp_h break - return merge_header_yaml - - def _merge_part(self, payload, headers): - merge_header_headers = headers.get(MERGE_HEADER, '') - payload_yaml = util.load_yaml(payload) - merge_how = '' # Select either the merge-type from the content # or the merge type from the headers or default to our own set - # if neither exists (or is empty) from the later - merge_header_yaml = self._merge_header_extract(payload_yaml) - for merge_i in [merge_header_yaml, merge_header_headers]: - merge_i = merge_i.strip().lower() - if merge_i: - merge_how = merge_i - break - if not merge_how: - merge_how = DEF_MERGE_TYPE - merger = mergers.construct(merge_how) - self.cloud_buf = merger.merge(self.cloud_buf, payload_yaml) + # if neither exists (or is empty) from the later. + payload_yaml = util.load_yaml(payload) + mergers_yaml = mergers.dict_extract_mergers(payload_yaml) + mergers_header = mergers.string_extract_mergers(merge_header_headers) + all_mergers = [] + all_mergers.extend(mergers_yaml) + all_mergers.extend(mergers_header) + if not all_mergers: + all_mergers = mergers.string_extract_mergers(DEF_MERGE_TYPE) + return all_mergers + + def _merge_part(self, payload, headers): + next_mergers = self._extract_mergers(payload, headers) + # Use the merger list from the last call, since it is the one + # that will be defining how to merge with the next payload. + curr_mergers = list(self.mergers[-1]) + LOG.debug("Merging with %s", curr_mergers) + self.mergers.append(next_mergers) + merger = mergers.construct(curr_mergers) + if self.cloud_buf is None: + # First time through, merge with an empty dict... + self.cloud_buf = {} + self.cloud_buf = merger.merge(self.cloud_buf, + util.load_yaml(payload)) def _reset(self): self.file_names = [] - self.cloud_buf = {} + self.cloud_buf = None + self.mergers = [ + mergers.string_extract_mergers(DEF_MERGE_TYPE), + ] def handle_part(self, _data, ctype, filename, payload, _freq, headers): if ctype == handlers.CONTENT_START: diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py index 20658edc..4a112165 100644 --- a/cloudinit/mergers/__init__.py +++ b/cloudinit/mergers/__init__.py @@ -34,6 +34,13 @@ class UnknownMerger(object): def _handle_unknown(self, meth_wanted, value, merge_with): return value + # This merging will attempt to look for a '_on_X' method + # in our own object for a given object Y with type X, + # if found it will be called to perform the merge of a source + # object and a object to merge_with. + # + # If not found the merge will be given to a '_handle_unknown' + # function which can decide what to do wit the 2 values. def merge(self, source, merge_with): type_name = util.obj_name(source) type_name = type_name.lower() @@ -56,6 +63,11 @@ class LookupMerger(UnknownMerger): else: self._lookups = lookups + # For items which can not be merged by the parent this object + # will lookup in a internally maintained set of objects and + # find which one of those objects can perform the merge. If + # any of the contained objects have the needed method, they + # will be called to perform the merge. def _handle_unknown(self, meth_wanted, value, merge_with): meth = None for merger in self._lookups: @@ -70,8 +82,33 @@ class LookupMerger(UnknownMerger): return meth(value, merge_with) -def _extract_merger_names(merge_how): - names = [] +def dict_extract_mergers(config): + parsed_mergers = [] + raw_mergers = config.get('merger_how') + if raw_mergers is None: + raw_mergers = config.get('merge_type') + if raw_mergers is None: + return parsed_mergers + if isinstance(raw_mergers, (str, basestring)): + return string_extract_mergers(raw_mergers) + for m in raw_mergers: + if isinstance(m, (dict)): + name = m['name'] + name = name.replace("-", "_").strip() + opts = m['settings'] + else: + name = m[0] + if len(m) >= 2: + opts = m[1:] + else: + opts = [] + if name: + parsed_mergers.append((name, opts)) + return parsed_mergers + + +def string_extract_mergers(merge_how): + parsed_mergers = [] for m_name in merge_how.split("+"): # Canonicalize the name (so that it can be found # even when users alter it in various ways) @@ -79,20 +116,20 @@ def _extract_merger_names(merge_how): m_name = m_name.replace("-", "_") if not m_name: continue - names.append(m_name) - return names - - -def construct(merge_how): - mergers_to_be = [] - for name in _extract_merger_names(merge_how): - match = NAME_MTCH.match(name) + match = NAME_MTCH.match(m_name) if not match: - msg = "Matcher identifer '%s' is not in the right format" % (name) + msg = "Matcher identifer '%s' is not in the right format" % (m_name) raise ValueError(msg) (m_name, m_ops) = match.groups() m_ops = m_ops.strip().split(",") m_ops = [m.strip().lower() for m in m_ops if m.strip()] + parsed_mergers.append((m_name, m_ops)) + return parsed_mergers + + +def construct(parsed_mergers): + mergers_to_be = [] + for (m_name, m_ops) in parsed_mergers: merger_locs = importer.find_module(m_name, [__name__], ['Merger']) diff --git a/cloudinit/mergers/dict.py b/cloudinit/mergers/dict.py index bc392afa..45a7d3a5 100644 --- a/cloudinit/mergers/dict.py +++ b/cloudinit/mergers/dict.py @@ -22,6 +22,17 @@ class Merger(object): self._merger = merger self._overwrite = 'overwrite' in opts + # This merging algorithm will attempt to merge with + # another dictionary, on encountering any other type of object + # it will not merge with said object, but will instead return + # the original value + # + # On encountering a dictionary, it will create a new dictionary + # composed of the original and the one to merge with, if 'overwrite' + # is enabled then keys that exist in the original will be overwritten + # by keys in the one to merge with (and associated values). Otherwise + # if not in overwrite mode the 2 conflicting keys themselves will + # be merged. def _on_dict(self, value, merge_with): if not isinstance(merge_with, (dict)): return value diff --git a/cloudinit/mergers/list.py b/cloudinit/mergers/list.py index a848b8d6..a56ff007 100644 --- a/cloudinit/mergers/list.py +++ b/cloudinit/mergers/list.py @@ -26,21 +26,24 @@ class Merger(object): def _on_tuple(self, value, merge_with): return self._on_list(list(value), merge_with) + # On encountering a list or tuple type this action will be applied + # a new list will be returned, if the value to merge with is itself + # a list and we have been told to 'extend', then the value here will + # be extended with the other list. If in 'extend' mode then we will + # attempt to merge instead, which means that values from the list + # to merge with will replace values in te original list (they will + # also be merged recursively). + # + # If the value to merge with is not a list, and we are set to discared + # then no modifications will take place, otherwise we will just append + # the value to merge with onto the end of our own list. def _on_list(self, value, merge_with): new_value = list(value) if isinstance(merge_with, (tuple, list)): if self._extend: new_value.extend(merge_with) else: - # Merge instead - for m_v in merge_with: - m_am = 0 - for (i, o_v) in enumerate(new_value): - if m_v == o_v: - new_value[i] = self._merger.merge(o_v, m_v) - m_am += 1 - if m_am == 0: - new_value.append(m_v) + return new_value else: if not self._discard_non: new_value.append(merge_with) diff --git a/cloudinit/mergers/str.py b/cloudinit/mergers/str.py index 14bc46ec..f1534c5b 100644 --- a/cloudinit/mergers/str.py +++ b/cloudinit/mergers/str.py @@ -21,9 +21,14 @@ class Merger(object): def __init__(self, merger, opts): self._append = 'append' in opts + # On encountering a unicode object to merge value with + # we will for now just proxy into the string method to let it handle it. def _on_unicode(self, value, merge_with): return self._on_str(value, merge_with) + # On encountering a string object to merge with we will + # perform the following action, if appending we will + # merge them together, otherwise we will just return value. def _on_str(self, value, merge_with): if not self._append: return value diff --git a/tests/unittests/test__init__.py b/tests/unittests/test__init__.py index ac082076..7924755a 100644 --- a/tests/unittests/test__init__.py +++ b/tests/unittests/test__init__.py @@ -22,8 +22,10 @@ class FakeModule(handlers.Handler): def list_types(self): return self.types - def _handle_part(self, data, ctype, filename, payload, frequency): + def handle_part(self, data, ctype, filename, payload, frequency): pass + + class TestWalkerHandleHandler(MockerTestCase): @@ -103,6 +105,9 @@ class TestHandlerHandlePart(MockerTestCase): self.filename = "fake filename" self.payload = "fake payload" self.frequency = settings.PER_INSTANCE + self.headers = { + 'Content-Type': self.ctype, + } def test_normal_version_1(self): """ @@ -118,8 +123,8 @@ class TestHandlerHandlePart(MockerTestCase): self.payload) self.mocker.replay() - handlers.run_part(mod_mock, self.data, self.ctype, self.filename, - self.payload, self.frequency) + handlers.run_part(mod_mock, self.data, self.filename, + self.payload, self.frequency, self.headers) def test_normal_version_2(self): """ @@ -135,8 +140,8 @@ class TestHandlerHandlePart(MockerTestCase): self.payload, self.frequency) self.mocker.replay() - handlers.run_part(mod_mock, self.data, self.ctype, self.filename, - self.payload, self.frequency) + handlers.run_part(mod_mock, self.data, self.filename, + self.payload, self.frequency, self.headers) def test_modfreq_per_always(self): """ @@ -152,8 +157,8 @@ class TestHandlerHandlePart(MockerTestCase): self.payload) self.mocker.replay() - handlers.run_part(mod_mock, self.data, self.ctype, self.filename, - self.payload, self.frequency) + handlers.run_part(mod_mock, self.data, self.filename, + self.payload, self.frequency, self.headers) def test_no_handle_when_modfreq_once(self): """C{handle_part} is not called if frequency is once.""" @@ -163,8 +168,8 @@ class TestHandlerHandlePart(MockerTestCase): self.mocker.result(settings.PER_ONCE) self.mocker.replay() - handlers.run_part(mod_mock, self.data, self.ctype, self.filename, - self.payload, self.frequency) + handlers.run_part(mod_mock, self.data, self.filename, + self.payload, self.frequency, self.headers) def test_exception_is_caught(self): """Exceptions within C{handle_part} are caught and logged.""" @@ -178,8 +183,8 @@ class TestHandlerHandlePart(MockerTestCase): self.mocker.throw(Exception()) self.mocker.replay() - handlers.run_part(mod_mock, self.data, self.ctype, self.filename, - self.payload, self.frequency) + handlers.run_part(mod_mock, self.data, self.filename, + self.payload, self.frequency, self.headers) class TestCmdlineUrl(MockerTestCase): diff --git a/tests/unittests/test_merging.py b/tests/unittests/test_merging.py index 0037b966..fa7ee8e4 100644 --- a/tests/unittests/test_merging.py +++ b/tests/unittests/test_merging.py @@ -1,62 +1,143 @@ -from mocker import MockerTestCase +import os -from cloudinit import util +from tests.unittests import helpers + +from cloudinit import mergers -class TestMergeDict(MockerTestCase): - def test_simple_merge(self): - """Test simple non-conflict merge.""" - source = {"key1": "value1"} - candidate = {"key2": "value2"} - result = util.mergedict(source, candidate) - self.assertEqual({"key1": "value1", "key2": "value2"}, result) +class TestSimpleRun(helpers.MockerTestCase): + def test_basic_merge(self): + source = { + 'Blah': ['blah2'], + 'Blah3': 'c', + } + merge_with = { + 'Blah2': ['blah3'], + 'Blah3': 'b', + 'Blah': ['123'], + } + # Basic merge should not do thing special + merge_how = "list()+dict()+str()" + merger_set = mergers.string_extract_mergers(merge_how) + self.assertEquals(3, len(merger_set)) + merger = mergers.construct(merger_set) + merged = merger.merge(source, merge_with) + self.assertEquals(merged['Blah'], ['blah2']) + self.assertEquals(merged['Blah2'], ['blah3']) + self.assertEquals(merged['Blah3'], 'c') - def test_nested_merge(self): - """Test nested merge.""" - source = {"key1": {"key1.1": "value1.1"}} - candidate = {"key1": {"key1.2": "value1.2"}} - result = util.mergedict(source, candidate) - self.assertEqual( - {"key1": {"key1.1": "value1.1", "key1.2": "value1.2"}}, result) + def test_dict_overwrite(self): + source = { + 'Blah': ['blah2'], + } + merge_with = { + 'Blah': ['123'], + } + # Now lets try a dict overwrite + merge_how = "list()+dict(overwrite)+str()" + merger_set = mergers.string_extract_mergers(merge_how) + self.assertEquals(3, len(merger_set)) + merger = mergers.construct(merger_set) + merged = merger.merge(source, merge_with) + self.assertEquals(merged['Blah'], ['123']) - def test_merge_does_not_override(self): - """Test that candidate doesn't override source.""" - source = {"key1": "value1", "key2": "value2"} - candidate = {"key1": "value2", "key2": "NEW VALUE"} - result = util.mergedict(source, candidate) - self.assertEqual(source, result) + def test_string_append(self): + source = { + 'Blah': 'blah2', + } + merge_with = { + 'Blah': '345', + } + merge_how = "list()+dict()+str(append)" + merger_set = mergers.string_extract_mergers(merge_how) + self.assertEquals(3, len(merger_set)) + merger = mergers.construct(merger_set) + merged = merger.merge(source, merge_with) + self.assertEquals(merged['Blah'], 'blah2345') - def test_empty_candidate(self): - """Test empty candidate doesn't change source.""" - source = {"key": "value"} - candidate = {} - result = util.mergedict(source, candidate) - self.assertEqual(source, result) + def test_list_extend(self): + source = ['abc'] + merge_with = ['123'] + merge_how = "list(extend)+dict()+str()" + merger_set = mergers.string_extract_mergers(merge_how) + self.assertEquals(3, len(merger_set)) + merger = mergers.construct(merger_set) + merged = merger.merge(source, merge_with) + self.assertEquals(merged, ['abc', '123']) - def test_empty_source(self): - """Test empty source is replaced by candidate.""" - source = {} - candidate = {"key": "value"} - result = util.mergedict(source, candidate) - self.assertEqual(candidate, result) + def test_deep_merge(self): + source = { + 'a': [1, 'b', 2], + 'b': 'blahblah', + 'c': { + 'e': [1, 2, 3], + 'f': 'bigblobof', + 'iamadict': { + 'ok': 'ok', + } + }, + 'run': [ + 'runme', + 'runme2', + ], + 'runmereally': [ + 'e', ['a'], 'd', + ], + } + merge_with = { + 'a': ['e', 'f', 'g'], + 'b': 'more', + 'c': { + 'a': 'b', + 'f': 'stuff', + }, + 'run': [ + 'morecmd', + 'moremoremore', + ], + 'runmereally': [ + 'blah', ['b'], 'e', + ], + } + merge_how = "list(extend)+dict()+str(append)" + merger_set = mergers.string_extract_mergers(merge_how) + self.assertEquals(3, len(merger_set)) + merger = mergers.construct(merger_set) + merged = merger.merge(source, merge_with) + self.assertEquals(merged['a'], [1, 'b', 2, 'e', 'f', 'g']) + self.assertEquals(merged['b'], 'blahblahmore') + self.assertEquals(merged['c']['f'], 'bigblobofstuff') + self.assertEquals(merged['run'], ['runme', 'runme2', 'morecmd', 'moremoremore']) + self.assertEquals(merged['runmereally'], ['e', ['a'], 'd', 'blah', ['b'], 'e']) - def test_non_dict_candidate(self): - """Test non-dict candidate is discarded.""" - source = {"key": "value"} - candidate = "not a dict" - result = util.mergedict(source, candidate) - self.assertEqual(source, result) + def test_dict_overwrite_layered(self): + source = { + 'Blah3': { + 'f': '3', + 'g': { + 'a': 'b', + } + } + } + merge_with = { + 'Blah3': { + 'e': '2', + 'g': { + 'e': 'f', + } + } + } + merge_how = "list()+dict()+str()" + merger_set = mergers.string_extract_mergers(merge_how) + self.assertEquals(3, len(merger_set)) + merger = mergers.construct(merger_set) + merged = merger.merge(source, merge_with) + self.assertEquals(merged['Blah3'], { + 'e': '2', + 'f': '3', + 'g': { + 'a': 'b', + 'e': 'f', + } + }) - def test_non_dict_source(self): - """Test non-dict source is not modified with a dict candidate.""" - source = "not a dict" - candidate = {"key": "value"} - result = util.mergedict(source, candidate) - self.assertEqual(source, result) - - def test_neither_dict(self): - """Test if neither candidate or source is dict source wins.""" - source = "source" - candidate = "candidate" - result = util.mergedict(source, candidate) - self.assertEqual(source, result) diff --git a/tests/unittests/test_userdata.py b/tests/unittests/test_userdata.py index 82a4c555..9e1fed7e 100644 --- a/tests/unittests/test_userdata.py +++ b/tests/unittests/test_userdata.py @@ -9,12 +9,17 @@ from email.mime.base import MIMEBase from mocker import MockerTestCase +from cloudinit import handlers +from cloudinit import helpers as c_helpers from cloudinit import log from cloudinit import sources from cloudinit import stages +from cloudinit import util INSTANCE_ID = "i-testing" +from tests.unittests import helpers + class FakeDataSource(sources.DataSource): @@ -26,22 +31,16 @@ class FakeDataSource(sources.DataSource): # FIXME: these tests shouldn't be checking log output?? # Weirddddd... - - -class TestConsumeUserData(MockerTestCase): +class TestConsumeUserData(helpers.FilesystemMockingTestCase): def setUp(self): - MockerTestCase.setUp(self) - # Replace the write so no actual files - # get written out... - self.mock_write = self.mocker.replace("cloudinit.util.write_file", - passthrough=False) + helpers.FilesystemMockingTestCase.setUp(self) self._log = None self._log_file = None self._log_handler = None def tearDown(self): - MockerTestCase.tearDown(self) + helpers.FilesystemMockingTestCase.tearDown(self) if self._log_handler and self._log: self._log.removeHandler(self._log_handler) @@ -53,12 +52,71 @@ class TestConsumeUserData(MockerTestCase): self._log.addHandler(self._log_handler) return log_file + def test_merging_cloud_config(self): + blob = ''' +#cloud-config +a: b +e: f +run: + - b + - c +''' + message1 = MIMEBase("text", "cloud-config") + message1['Merge-Type'] = 'dict()+list(extend)+str(append)' + message1.set_payload(blob) + + blob2 = ''' +#cloud-config +a: e +e: g +run: + - stuff + - morestuff +''' + message2 = MIMEBase("text", "cloud-config") + message2['Merge-Type'] = 'dict()+list(extend)+str()' + message2.set_payload(blob2) + + blob3 = ''' +#cloud-config +e: + - 1 + - 2 + - 3 +''' + message3 = MIMEBase("text", "cloud-config") + message3['Merge-Type'] = 'dict()+list()+str()' + message3.set_payload(blob3) + + messages = [message1, message2, message3] + + paths = c_helpers.Paths({}, ds=FakeDataSource('')) + cloud_cfg = handlers.cloud_config.CloudConfigPartHandler(paths) + + new_root = self.makeDir() + self.patchUtils(new_root) + self.patchOS(new_root) + cloud_cfg.handle_part(None, handlers.CONTENT_START, None, None, None, None) + for i, m in enumerate(messages): + headers = dict(m) + fn = "part-%s" % (i + 1) + payload = m.get_payload(decode=True) + cloud_cfg.handle_part(None, headers['Content-Type'], + fn, payload, None, headers) + cloud_cfg.handle_part(None, handlers.CONTENT_END, None, None, None, None) + contents = util.load_file(paths.get_ipath('cloud_config')) + contents = util.load_yaml(contents) + self.assertEquals(contents['run'], ['b', 'c', 'stuff', 'morestuff']) + self.assertEquals(contents['a'], 'be') + self.assertEquals(contents['e'], 'fg') + def test_unhandled_type_warning(self): """Raw text without magic is ignored but shows warning.""" ci = stages.Init() data = "arbitrary text\n" ci.datasource = FakeDataSource(data) + self.mock_write = self.mocker.replace("cloudinit.util.write_file", passthrough=False) self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) self.mocker.replay() @@ -76,6 +134,7 @@ class TestConsumeUserData(MockerTestCase): message.set_payload("Just text") ci.datasource = FakeDataSource(message.as_string()) + self.mock_write = self.mocker.replace("cloudinit.util.write_file", passthrough=False) self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) self.mocker.replay() @@ -93,6 +152,7 @@ class TestConsumeUserData(MockerTestCase): ci.datasource = FakeDataSource(script) outpath = os.path.join(ci.paths.get_ipath_cur("scripts"), "part-001") + self.mock_write = self.mocker.replace("cloudinit.util.write_file", passthrough=False) self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) self.mock_write(outpath, script, 0700) self.mocker.replay() @@ -111,6 +171,7 @@ class TestConsumeUserData(MockerTestCase): ci.datasource = FakeDataSource(message.as_string()) outpath = os.path.join(ci.paths.get_ipath_cur("scripts"), "part-001") + self.mock_write = self.mocker.replace("cloudinit.util.write_file", passthrough=False) self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) self.mock_write(outpath, script, 0700) self.mocker.replay() @@ -129,6 +190,7 @@ class TestConsumeUserData(MockerTestCase): ci.datasource = FakeDataSource(message.as_string()) outpath = os.path.join(ci.paths.get_ipath_cur("scripts"), "part-001") + self.mock_write = self.mocker.replace("cloudinit.util.write_file", passthrough=False) self.mock_write(outpath, script, 0700) self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) self.mocker.replay() From 6596b4585a98c1e8c61760b94d324a9919f2aae0 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Wed, 6 Mar 2013 19:24:05 -0800 Subject: [PATCH 11/16] Continue working on merging code. --- cloudinit/config/cc_landscape.py | 3 +- cloudinit/config/cc_mounts.py | 3 +- cloudinit/distros/__init__.py | 15 +++++----- cloudinit/handlers/__init__.py | 3 +- cloudinit/handlers/cloud_config.py | 15 ++++------ cloudinit/helpers.py | 3 +- cloudinit/mergers/__init__.py | 13 +++++++-- cloudinit/sources/DataSourceAltCloud.py | 5 ++-- cloudinit/sources/DataSourceCloudStack.py | 3 -- cloudinit/sources/DataSourceConfigDrive.py | 4 ++- cloudinit/sources/DataSourceEc2.py | 3 -- cloudinit/sources/DataSourceMAAS.py | 3 +- cloudinit/sources/DataSourceNoCloud.py | 5 ++-- cloudinit/sources/DataSourceNone.py | 3 -- cloudinit/sources/DataSourceOVF.py | 3 +- cloudinit/sources/__init__.py | 10 +++++-- cloudinit/stages.py | 9 +++--- cloudinit/type_utils.py | 34 ++++++++++++++++++++++ cloudinit/util.py | 33 +++++++-------------- tests/unittests/test_userdata.py | 4 ++- 20 files changed, 104 insertions(+), 70 deletions(-) create mode 100644 cloudinit/type_utils.py diff --git a/cloudinit/config/cc_landscape.py b/cloudinit/config/cc_landscape.py index 02610dd0..6734efee 100644 --- a/cloudinit/config/cc_landscape.py +++ b/cloudinit/config/cc_landscape.py @@ -24,6 +24,7 @@ from StringIO import StringIO from configobj import ConfigObj +from cloudinit import type_utils from cloudinit import util from cloudinit.settings import PER_INSTANCE @@ -58,7 +59,7 @@ def handle(_name, cfg, cloud, log, _args): if not isinstance(ls_cloudcfg, (dict)): raise RuntimeError(("'landscape' key existed in config," " but not a dictionary type," - " is a %s instead"), util.obj_name(ls_cloudcfg)) + " is a %s instead"), type_utils.obj_name(ls_cloudcfg)) if not ls_cloudcfg: return diff --git a/cloudinit/config/cc_mounts.py b/cloudinit/config/cc_mounts.py index cb772c86..6ebe563d 100644 --- a/cloudinit/config/cc_mounts.py +++ b/cloudinit/config/cc_mounts.py @@ -22,6 +22,7 @@ from string import whitespace # pylint: disable=W0402 import re +from cloudinit import type_utils from cloudinit import util # Shortname matches 'sda', 'sda1', 'xvda', 'hda', 'sdb', xvdb, vda, vdd1 @@ -60,7 +61,7 @@ def handle(_name, cfg, cloud, log, _args): # skip something that wasn't a list if not isinstance(cfgmnt[i], list): log.warn("Mount option %s not a list, got a %s instead", - (i + 1), util.obj_name(cfgmnt[i])) + (i + 1), type_utils.obj_name(cfgmnt[i])) continue startname = str(cfgmnt[i][0]) diff --git a/cloudinit/distros/__init__.py b/cloudinit/distros/__init__.py index 6a684b89..eeea6af1 100644 --- a/cloudinit/distros/__init__.py +++ b/cloudinit/distros/__init__.py @@ -31,6 +31,7 @@ import re from cloudinit import importer from cloudinit import log as logging from cloudinit import ssh_util +from cloudinit import type_utils from cloudinit import util from cloudinit.distros.parsers import hosts @@ -427,7 +428,7 @@ class Distro(object): lines.append("%s %s" % (user, rules)) else: msg = "Can not create sudoers rule addition with type %r" - raise TypeError(msg % (util.obj_name(rules))) + raise TypeError(msg % (type_utils.obj_name(rules))) content = "\n".join(lines) content += "\n" # trailing newline @@ -550,7 +551,7 @@ def _normalize_groups(grp_cfg): c_grp_cfg[k] = [v] else: raise TypeError("Bad group member type %s" % - util.obj_name(v)) + type_utils.obj_name(v)) else: if isinstance(v, (list)): c_grp_cfg[k].extend(v) @@ -558,13 +559,13 @@ def _normalize_groups(grp_cfg): c_grp_cfg[k].append(v) else: raise TypeError("Bad group member type %s" % - util.obj_name(v)) + type_utils.obj_name(v)) elif isinstance(i, (str, basestring)): if i not in c_grp_cfg: c_grp_cfg[i] = [] else: raise TypeError("Unknown group name type %s" % - util.obj_name(i)) + type_utils.obj_name(i)) grp_cfg = c_grp_cfg groups = {} if isinstance(grp_cfg, (dict)): @@ -573,7 +574,7 @@ def _normalize_groups(grp_cfg): else: raise TypeError(("Group config must be list, dict " " or string types only and not %s") % - util.obj_name(grp_cfg)) + type_utils.obj_name(grp_cfg)) return groups @@ -604,7 +605,7 @@ def _normalize_users(u_cfg, def_user_cfg=None): ad_ucfg.append(v) else: raise TypeError(("Unmappable user value type %s" - " for key %s") % (util.obj_name(v), k)) + " for key %s") % (type_utils.obj_name(v), k)) u_cfg = ad_ucfg elif isinstance(u_cfg, (str, basestring)): u_cfg = util.uniq_merge_sorted(u_cfg) @@ -629,7 +630,7 @@ def _normalize_users(u_cfg, def_user_cfg=None): else: raise TypeError(("User config must be dictionary/list " " or string types only and not %s") % - util.obj_name(user_config)) + type_utils.obj_name(user_config)) # Ensure user options are in the right python friendly format if users: diff --git a/cloudinit/handlers/__init__.py b/cloudinit/handlers/__init__.py index 63fdb948..924463ce 100644 --- a/cloudinit/handlers/__init__.py +++ b/cloudinit/handlers/__init__.py @@ -27,6 +27,7 @@ from cloudinit.settings import (PER_ALWAYS, PER_INSTANCE, FREQUENCIES) from cloudinit import importer from cloudinit import log as logging +from cloudinit import type_utils from cloudinit import util LOG = logging.getLogger(__name__) @@ -76,7 +77,7 @@ class Handler(object): self.frequency = frequency def __repr__(self): - return "%s: [%s]" % (util.obj_name(self), self.list_types()) + return "%s: [%s]" % (type_utils.obj_name(self), self.list_types()) @abc.abstractmethod def list_types(self): diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index d458dee2..5f519f78 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -29,8 +29,8 @@ from cloudinit.settings import (PER_ALWAYS) LOG = logging.getLogger(__name__) -DEF_MERGE_TYPE = "list(extend)+dict()+str(append)" MERGE_HEADER = 'Merge-Type' +DEF_MERGERS = mergers.default_mergers() class CloudConfigPartHandler(handlers.Handler): @@ -39,9 +39,7 @@ class CloudConfigPartHandler(handlers.Handler): self.cloud_buf = None self.cloud_fn = paths.get_ipath("cloud_config") self.file_names = [] - self.mergers = [ - mergers.string_extract_mergers(DEF_MERGE_TYPE), - ] + self.mergers = [DEF_MERGERS] def list_types(self): return [ @@ -59,6 +57,7 @@ class CloudConfigPartHandler(handlers.Handler): file_lines.append("# %s" % (fn)) file_lines.append("") if self.cloud_buf is not None: + # Something was actually gathered.... lines = [ "#cloud-config", '', @@ -86,7 +85,7 @@ class CloudConfigPartHandler(handlers.Handler): all_mergers.extend(mergers_yaml) all_mergers.extend(mergers_header) if not all_mergers: - all_mergers = mergers.string_extract_mergers(DEF_MERGE_TYPE) + all_mergers = DEF_MERGERS return all_mergers def _merge_part(self, payload, headers): @@ -94,7 +93,7 @@ class CloudConfigPartHandler(handlers.Handler): # Use the merger list from the last call, since it is the one # that will be defining how to merge with the next payload. curr_mergers = list(self.mergers[-1]) - LOG.debug("Merging with %s", curr_mergers) + LOG.debug("Merging by applying %s", curr_mergers) self.mergers.append(next_mergers) merger = mergers.construct(curr_mergers) if self.cloud_buf is None: @@ -106,9 +105,7 @@ class CloudConfigPartHandler(handlers.Handler): def _reset(self): self.file_names = [] self.cloud_buf = None - self.mergers = [ - mergers.string_extract_mergers(DEF_MERGE_TYPE), - ] + self.mergers = [DEF_MERGERS] def handle_part(self, _data, ctype, filename, payload, _freq, headers): if ctype == handlers.CONTENT_START: diff --git a/cloudinit/helpers.py b/cloudinit/helpers.py index 2077401c..a4e6fb03 100644 --- a/cloudinit/helpers.py +++ b/cloudinit/helpers.py @@ -32,6 +32,7 @@ from cloudinit.settings import (PER_INSTANCE, PER_ALWAYS, PER_ONCE, CFG_ENV_NAME) from cloudinit import log as logging +from cloudinit import type_utils from cloudinit import util LOG = logging.getLogger(__name__) @@ -68,7 +69,7 @@ class FileLock(object): self.fn = fn def __str__(self): - return "<%s using file %r>" % (util.obj_name(self), self.fn) + return "<%s using file %r>" % (type_utils.obj_name(self), self.fn) def canon_sem_name(name): diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py index 4a112165..453426af 100644 --- a/cloudinit/mergers/__init__.py +++ b/cloudinit/mergers/__init__.py @@ -20,11 +20,12 @@ import re from cloudinit import importer from cloudinit import log as logging -from cloudinit import util +from cloudinit import type_utils NAME_MTCH = re.compile(r"(^[a-zA-Z_][A-Za-z0-9_]*)\((.*?)\)$") LOG = logging.getLogger(__name__) +DEF_MERGE_TYPE = "list(extend)+dict()+str(append)" class UnknownMerger(object): @@ -42,7 +43,7 @@ class UnknownMerger(object): # If not found the merge will be given to a '_handle_unknown' # function which can decide what to do wit the 2 values. def merge(self, source, merge_with): - type_name = util.obj_name(source) + type_name = type_utils.obj_name(source) type_name = type_name.lower() method_name = "_on_%s" % (type_name) meth = None @@ -127,6 +128,10 @@ def string_extract_mergers(merge_how): return parsed_mergers +def default_mergers(): + return tuple(string_extract_mergers(DEF_MERGE_TYPE)) + + def construct(parsed_mergers): mergers_to_be = [] for (m_name, m_ops) in parsed_mergers: @@ -145,4 +150,6 @@ def construct(parsed_mergers): root = LookupMerger(mergers) for (attr, opts) in mergers_to_be: mergers.append(attr(root, opts)) - return root \ No newline at end of file + return root + + diff --git a/cloudinit/sources/DataSourceAltCloud.py b/cloudinit/sources/DataSourceAltCloud.py index 9812bdcb..64548d43 100644 --- a/cloudinit/sources/DataSourceAltCloud.py +++ b/cloudinit/sources/DataSourceAltCloud.py @@ -30,6 +30,7 @@ import os.path from cloudinit import log as logging from cloudinit import sources from cloudinit import util + from cloudinit.util import ProcessExecutionError LOG = logging.getLogger(__name__) @@ -91,8 +92,8 @@ class DataSourceAltCloud(sources.DataSource): self.supported_seed_starts = ("/", "file://") def __str__(self): - mstr = "%s [seed=%s]" % (util.obj_name(self), self.seed) - return mstr + root = sources.DataSource.__str__(self) + return "%s [seed=%s]" % (root, self.seed) def get_cloud_type(self): ''' diff --git a/cloudinit/sources/DataSourceCloudStack.py b/cloudinit/sources/DataSourceCloudStack.py index 076dba5a..c0e1a23c 100644 --- a/cloudinit/sources/DataSourceCloudStack.py +++ b/cloudinit/sources/DataSourceCloudStack.py @@ -59,9 +59,6 @@ class DataSourceCloudStack(sources.DataSource): return gw return None - def __str__(self): - return util.obj_name(self) - def _get_url_settings(self): mcfg = self.ds_cfg if not mcfg: diff --git a/cloudinit/sources/DataSourceConfigDrive.py b/cloudinit/sources/DataSourceConfigDrive.py index c7826851..46abd772 100644 --- a/cloudinit/sources/DataSourceConfigDrive.py +++ b/cloudinit/sources/DataSourceConfigDrive.py @@ -51,7 +51,9 @@ class DataSourceConfigDrive(sources.DataSource): self.ec2_metadata = None def __str__(self): - mstr = "%s [%s,ver=%s]" % (util.obj_name(self), self.dsmode, + root = sources.DataSource.__str__(self) + mstr = "%s [%s,ver=%s]" % (root, + self.dsmode, self.version) mstr += "[source=%s]" % (self.source) return mstr diff --git a/cloudinit/sources/DataSourceEc2.py b/cloudinit/sources/DataSourceEc2.py index 2db53446..f010e640 100644 --- a/cloudinit/sources/DataSourceEc2.py +++ b/cloudinit/sources/DataSourceEc2.py @@ -49,9 +49,6 @@ class DataSourceEc2(sources.DataSource): self.seed_dir = os.path.join(paths.seed_dir, "ec2") self.api_ver = DEF_MD_VERSION - def __str__(self): - return util.obj_name(self) - def get_data(self): seed_ret = {} if util.read_optional_seed(seed_ret, base=(self.seed_dir + "/")): diff --git a/cloudinit/sources/DataSourceMAAS.py b/cloudinit/sources/DataSourceMAAS.py index b55d8a21..612d8ffa 100644 --- a/cloudinit/sources/DataSourceMAAS.py +++ b/cloudinit/sources/DataSourceMAAS.py @@ -50,7 +50,8 @@ class DataSourceMAAS(sources.DataSource): self.oauth_clockskew = None def __str__(self): - return "%s [%s]" % (util.obj_name(self), self.base_url) + root = sources.DataSource.__str__(self) + return "%s [%s]" % (root, self.base_url) def get_data(self): mcfg = self.ds_cfg diff --git a/cloudinit/sources/DataSourceNoCloud.py b/cloudinit/sources/DataSourceNoCloud.py index bed500a2..9a770d38 100644 --- a/cloudinit/sources/DataSourceNoCloud.py +++ b/cloudinit/sources/DataSourceNoCloud.py @@ -40,9 +40,8 @@ class DataSourceNoCloud(sources.DataSource): self.supported_seed_starts = ("/", "file://") def __str__(self): - mstr = "%s [seed=%s][dsmode=%s]" % (util.obj_name(self), - self.seed, self.dsmode) - return mstr + root = sources.DataSource.__str__(self) + return "%s [seed=%s][dsmode=%s]" % (root, self.seed, self.dsmode) def get_data(self): defaults = { diff --git a/cloudinit/sources/DataSourceNone.py b/cloudinit/sources/DataSourceNone.py index c2125bee..e2175e1f 100644 --- a/cloudinit/sources/DataSourceNone.py +++ b/cloudinit/sources/DataSourceNone.py @@ -41,9 +41,6 @@ class DataSourceNone(sources.DataSource): def get_instance_id(self): return 'iid-datasource-none' - def __str__(self): - return util.obj_name(self) - @property def is_disconnected(self): return True diff --git a/cloudinit/sources/DataSourceOVF.py b/cloudinit/sources/DataSourceOVF.py index e90150c6..ae139074 100644 --- a/cloudinit/sources/DataSourceOVF.py +++ b/cloudinit/sources/DataSourceOVF.py @@ -43,7 +43,8 @@ class DataSourceOVF(sources.DataSource): self.supported_seed_starts = ("/", "file://") def __str__(self): - return "%s [seed=%s]" % (util.obj_name(self), self.seed) + root = sources.DataSource.__str__(self) + return "%s [seed=%s]" % (root, self.seed) def get_data(self): found = [] diff --git a/cloudinit/sources/__init__.py b/cloudinit/sources/__init__.py index 96baff90..d8fbacdd 100644 --- a/cloudinit/sources/__init__.py +++ b/cloudinit/sources/__init__.py @@ -25,6 +25,7 @@ import os from cloudinit import importer from cloudinit import log as logging +from cloudinit import type_utils from cloudinit import user_data as ud from cloudinit import util @@ -52,7 +53,7 @@ class DataSource(object): self.userdata = None self.metadata = None self.userdata_raw = None - name = util.obj_name(self) + name = type_utils.obj_name(self) if name.startswith(DS_PREFIX): name = name[len(DS_PREFIX):] self.ds_cfg = util.get_cfg_by_path(self.sys_cfg, @@ -62,6 +63,9 @@ class DataSource(object): else: self.ud_proc = ud_proc + def __str__(self): + return type_utils.obj_name(self) + def get_userdata(self, apply_filter=False): if self.userdata is None: self.userdata = self.ud_proc.process(self.get_userdata_raw()) @@ -214,7 +218,7 @@ def normalize_pubkey_data(pubkey_data): def find_source(sys_cfg, distro, paths, ds_deps, cfg_list, pkg_list): ds_list = list_sources(cfg_list, ds_deps, pkg_list) - ds_names = [util.obj_name(f) for f in ds_list] + ds_names = [type_utils.obj_name(f) for f in ds_list] LOG.debug("Searching for data source in: %s", ds_names) for cls in ds_list: @@ -222,7 +226,7 @@ def find_source(sys_cfg, distro, paths, ds_deps, cfg_list, pkg_list): LOG.debug("Seeing if we can get any data from %s", cls) s = cls(sys_cfg, distro, paths) if s.get_data(): - return (s, util.obj_name(cls)) + return (s, type_utils.obj_name(cls)) except Exception: util.logexc(LOG, "Getting data from %s failed", cls) diff --git a/cloudinit/stages.py b/cloudinit/stages.py index 94a267df..531e7997 100644 --- a/cloudinit/stages.py +++ b/cloudinit/stages.py @@ -43,6 +43,7 @@ from cloudinit import helpers from cloudinit import importer from cloudinit import log as logging from cloudinit import sources +from cloudinit import type_utils from cloudinit import util LOG = logging.getLogger(__name__) @@ -220,7 +221,7 @@ class Init(object): # Any config provided??? pkg_list = self.cfg.get('datasource_pkg_list') or [] # Add the defaults at the end - for n in ['', util.obj_name(sources)]: + for n in ['', type_utils.obj_name(sources)]: if n not in pkg_list: pkg_list.append(n) cfg_list = self.cfg.get('datasource_list') or [] @@ -280,7 +281,7 @@ class Init(object): dp = self.paths.get_cpath('data') # Write what the datasource was and is.. - ds = "%s: %s" % (util.obj_name(self.datasource), self.datasource) + ds = "%s: %s" % (type_utils.obj_name(self.datasource), self.datasource) previous_ds = None ds_fn = os.path.join(idir, 'datasource') try: @@ -497,7 +498,7 @@ class Modules(object): else: raise TypeError(("Failed to read '%s' item in config," " unknown type %s") % - (item, util.obj_name(item))) + (item, type_utils.obj_name(item))) return module_list def _fixup_modules(self, raw_mods): @@ -515,7 +516,7 @@ class Modules(object): # Reset it so when ran it will get set to a known value freq = None mod_locs = importer.find_module(mod_name, - ['', util.obj_name(config)], + ['', type_utils.obj_name(config)], ['handle']) if not mod_locs: LOG.warn("Could not find module named %s", mod_name) diff --git a/cloudinit/type_utils.py b/cloudinit/type_utils.py new file mode 100644 index 00000000..2decbfc5 --- /dev/null +++ b/cloudinit/type_utils.py @@ -0,0 +1,34 @@ +# vi: ts=4 expandtab +# +# Copyright (C) 2012 Canonical Ltd. +# Copyright (C) 2012 Hewlett-Packard Development Company, L.P. +# Copyright (C) 2012 Yahoo! Inc. +# +# Author: Scott Moser +# Author: Juerg Haefliger +# Author: Joshua Harlow +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 3, as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# pylint: disable=C0302 + +import types + + +def obj_name(obj): + if isinstance(obj, (types.TypeType, + types.ModuleType, + types.FunctionType, + types.LambdaType)): + return str(obj.__name__) + return obj_name(obj.__class__) diff --git a/cloudinit/util.py b/cloudinit/util.py index ab918433..73bf6304 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -43,14 +43,15 @@ import subprocess import sys import tempfile import time -import types import urlparse import yaml from cloudinit import importer from cloudinit import log as logging +from cloudinit import mergers from cloudinit import safeyaml +from cloudinit import type_utils from cloudinit import url_helper as uhelp from cloudinit import version @@ -194,11 +195,12 @@ def fork_cb(child_cb, *args): os._exit(0) # pylint: disable=W0212 except: logexc(LOG, ("Failed forking and" - " calling callback %s"), obj_name(child_cb)) + " calling callback %s"), + type_utils.obj_name(child_cb)) os._exit(1) # pylint: disable=W0212 else: LOG.debug("Forked child %s who will run callback %s", - fid, obj_name(child_cb)) + fid, type_utils.obj_name(child_cb)) def is_true(val, addons=None): @@ -513,15 +515,6 @@ def make_url(scheme, host, port=None, return urlparse.urlunparse(pieces) -def obj_name(obj): - if isinstance(obj, (types.TypeType, - types.ModuleType, - types.FunctionType, - types.LambdaType)): - return str(obj.__name__) - return obj_name(obj.__class__) - - def mergemanydict(srcs, reverse=False): if reverse: srcs = reversed(srcs) @@ -538,13 +531,9 @@ def mergedict(src, cand): If C{src} has a key C{cand} will not override. Nested dictionaries are merged recursively. """ - if isinstance(src, dict) and isinstance(cand, dict): - for (k, v) in cand.iteritems(): - if k not in src: - src[k] = v - else: - src[k] = mergedict(src[k], v) - return src + raw_mergers = mergers.default_mergers() + merger = mergers.construct(raw_mergers) + return merger.merge(src, cand) @contextlib.contextmanager @@ -645,7 +634,7 @@ def load_yaml(blob, default=None, allowed=(dict,)): # Yes this will just be caught, but thats ok for now... raise TypeError(("Yaml load allows %s root types," " but got %s instead") % - (allowed, obj_name(converted))) + (allowed, type_utils.obj_name(converted))) loaded = converted except (yaml.YAMLError, TypeError, ValueError): if len(blob) == 0: @@ -714,7 +703,7 @@ def read_conf_with_confd(cfgfile): if not isinstance(confd, (str, basestring)): raise TypeError(("Config file %s contains 'conf_d' " "with non-string type %s") % - (cfgfile, obj_name(confd))) + (cfgfile, type_utils.obj_name(confd))) else: confd = str(confd).strip() elif os.path.isdir("%s.d" % cfgfile): @@ -1472,7 +1461,7 @@ def shellify(cmdlist, add_header=True): else: raise RuntimeError(("Unable to shellify type %s" " which is not a list or string") - % (obj_name(args))) + % (type_utils.obj_name(args))) LOG.debug("Shellified %s commands.", cmds_made) return content diff --git a/tests/unittests/test_userdata.py b/tests/unittests/test_userdata.py index 9e1fed7e..ef0dd7b8 100644 --- a/tests/unittests/test_userdata.py +++ b/tests/unittests/test_userdata.py @@ -74,7 +74,7 @@ run: - morestuff ''' message2 = MIMEBase("text", "cloud-config") - message2['Merge-Type'] = 'dict()+list(extend)+str()' + message2['X-Merge-Type'] = 'dict()+list(extend)+str()' message2.set_payload(blob2) blob3 = ''' @@ -83,6 +83,7 @@ e: - 1 - 2 - 3 +p: 1 ''' message3 = MIMEBase("text", "cloud-config") message3['Merge-Type'] = 'dict()+list()+str()' @@ -109,6 +110,7 @@ e: self.assertEquals(contents['run'], ['b', 'c', 'stuff', 'morestuff']) self.assertEquals(contents['a'], 'be') self.assertEquals(contents['e'], 'fg') + self.assertEquals(contents['p'], 1) def test_unhandled_type_warning(self): """Raw text without magic is ignored but shows warning.""" From 73fc7534b77f37baa9c9e6d73812e27ab1c69958 Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Wed, 6 Mar 2013 19:36:31 -0800 Subject: [PATCH 12/16] Make conf.d and the default merging use the new merging algos. --- cloudinit/sources/DataSourceConfigDrive.py | 2 +- cloudinit/sources/DataSourceNoCloud.py | 8 +++---- cloudinit/sources/DataSourceOVF.py | 4 ++-- cloudinit/util.py | 25 +++++++++++----------- 4 files changed, 19 insertions(+), 20 deletions(-) diff --git a/cloudinit/sources/DataSourceConfigDrive.py b/cloudinit/sources/DataSourceConfigDrive.py index 46abd772..0216ed07 100644 --- a/cloudinit/sources/DataSourceConfigDrive.py +++ b/cloudinit/sources/DataSourceConfigDrive.py @@ -154,7 +154,7 @@ class DataSourceConfigDrive(sources.DataSource): return False md = results['metadata'] - md = util.mergedict(md, DEFAULT_METADATA) + md = util.mergemanydict([md, DEFAULT_METADATA]) # Perform some metadata 'fixups' # diff --git a/cloudinit/sources/DataSourceNoCloud.py b/cloudinit/sources/DataSourceNoCloud.py index 9a770d38..7800812b 100644 --- a/cloudinit/sources/DataSourceNoCloud.py +++ b/cloudinit/sources/DataSourceNoCloud.py @@ -64,7 +64,7 @@ class DataSourceNoCloud(sources.DataSource): # Check to see if the seed dir has data. seedret = {} if util.read_optional_seed(seedret, base=self.seed_dir + "/"): - md = util.mergedict(md, seedret['meta-data']) + md = util.mergemanydict([md, seedret['meta-data']]) ud = seedret['user-data'] found.append(self.seed_dir) LOG.debug("Using seeded cache data from %s", self.seed_dir) @@ -88,7 +88,7 @@ class DataSourceNoCloud(sources.DataSource): LOG.debug("Attempting to use data from %s", dev) (newmd, newud) = util.mount_cb(dev, util.read_seeded) - md = util.mergedict(newmd, md) + md = util.mergemanydict([newmd, md]) ud = newud # For seed from a device, the default mode is 'net'. @@ -139,11 +139,11 @@ class DataSourceNoCloud(sources.DataSource): LOG.debug("Using seeded cache data from %s", seedfrom) # Values in the command line override those from the seed - md = util.mergedict(md, md_seed) + md = util.mergemanydict([md, md_seed]) found.append(seedfrom) # Now that we have exhausted any other places merge in the defaults - md = util.mergedict(md, defaults) + md = util.mergemanydict([md, defaults]) # Update the network-interfaces if metadata had 'network-interfaces' # entry and this is the local datasource, or 'seedfrom' was used diff --git a/cloudinit/sources/DataSourceOVF.py b/cloudinit/sources/DataSourceOVF.py index ae139074..0530c4b7 100644 --- a/cloudinit/sources/DataSourceOVF.py +++ b/cloudinit/sources/DataSourceOVF.py @@ -94,11 +94,11 @@ class DataSourceOVF(sources.DataSource): (md_seed, ud) = util.read_seeded(seedfrom, timeout=None) LOG.debug("Using seeded cache data from %s", seedfrom) - md = util.mergedict(md, md_seed) + md = util.mergemanydict([md, md_seed]) found.append(seedfrom) # Now that we have exhausted any other places merge in the defaults - md = util.mergedict(md, defaults) + md = util.mergemanydict([md, defaults]) self.seed = ",".join(found) self.metadata = md diff --git a/cloudinit/util.py b/cloudinit/util.py index 73bf6304..e5c6f4ea 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -519,23 +519,22 @@ def mergemanydict(srcs, reverse=False): if reverse: srcs = reversed(srcs) m_cfg = {} + merge_how = [mergers.default_mergers()] for a_cfg in srcs: if a_cfg: - m_cfg = mergedict(m_cfg, a_cfg) + # Take the last merger as the one that + # will define how to merge next... + mergers_to_apply = list(merge_how[-1]) + merger = mergers.construct(mergers_to_apply) + m_cfg = merger.merge(m_cfg, a_cfg) + # If the config has now has new merger set, + # extract them to be used next time... + new_mergers = mergers.dict_extract_mergers(m_cfg) + if new_mergers: + merge_how.append(new_mergers) return m_cfg -def mergedict(src, cand): - """ - Merge values from C{cand} into C{src}. - If C{src} has a key C{cand} will not override. - Nested dictionaries are merged recursively. - """ - raw_mergers = mergers.default_mergers() - merger = mergers.construct(raw_mergers) - return merger.merge(src, cand) - - @contextlib.contextmanager def chdir(ndir): curr = os.getcwd() @@ -714,7 +713,7 @@ def read_conf_with_confd(cfgfile): # Conf.d settings override input configuration confd_cfg = read_conf_d(confd) - return mergedict(confd_cfg, cfg) + return mergemanydict([confd_cfg, cfg]) def read_cc_from_cmdline(cmdline=None): From a72b7b99cd2a202f5cf34abcd6b3908e5920bf2c Mon Sep 17 00:00:00 2001 From: Joshua Harlow Date: Wed, 6 Mar 2013 22:24:29 -0800 Subject: [PATCH 13/16] Add some nice docs on what this is. --- cloudinit/mergers/__init__.py | 2 +- doc/merging.txt | 179 ++++++++++++++++++++++++++++++++++ 2 files changed, 180 insertions(+), 1 deletion(-) create mode 100644 doc/merging.txt diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py index 453426af..45e88fb3 100644 --- a/cloudinit/mergers/__init__.py +++ b/cloudinit/mergers/__init__.py @@ -85,7 +85,7 @@ class LookupMerger(UnknownMerger): def dict_extract_mergers(config): parsed_mergers = [] - raw_mergers = config.get('merger_how') + raw_mergers = config.get('merge_how') if raw_mergers is None: raw_mergers = config.get('merge_type') if raw_mergers is None: diff --git a/doc/merging.txt b/doc/merging.txt new file mode 100644 index 00000000..f719aec8 --- /dev/null +++ b/doc/merging.txt @@ -0,0 +1,179 @@ +Arriving in 0.7.2 is a new way to handle dictionary merging in cloud-init. +--- + +Overview +-------- + +This was done because it has been a common feature request that there be a +way to specify how cloud-config yaml "dictionaries" are merged together when +there are multiple yamls to merge together (say when performing an #include). + +Since previously the merging algorithm was very simple and would only overwrite +and not append lists, or strings, and so on it was decided to create a new and +improved way to merge dictionaries (and there contained objects) together in a +way that is customizable, thus allowing for users who provide cloud-config data +to determine exactly how there objects will be merged. + +For example. + +#cloud-config (1) +run_cmd: + - bash1 + - bash2 + +#cloud-config (2) +run_cmd: + - bash3 + - bash4 + +The previous way of merging the following 2 objects would result in a final +cloud-config object that contains the following. + +#cloud-config (merged) +run_cmd: + - bash3 + - bash4 + +Typically this is not what users want, instead they would likely prefer: + +#cloud-config (merged) +run_cmd: + - bash1 + - bash2 + - bash3 + - bash4 + +This way makes it easier to combine the various cloud-config objects you have +into a more useful list, thus reducing duplication that would have had to +occur in the previous method to accomplish the same result. + +Customizability +--------------- + +Since the above merging algorithm may not always be the desired merging +algorithm (like how the merging algorithm in < 0.7.2 was not always the preferred +one) the concept of customizing how merging can be done was introduced through +a new concept call 'merge classes'. + +A merge class is a class defintion which provides functions that can be used +to merge a given type with another given type. + +An example of one of these merging classes is the following: + +class Merger(object): + def __init__(self, merger, opts): + self._merger = merger + self._overwrite = 'overwrite' in opts + + # This merging algorithm will attempt to merge with + # another dictionary, on encountering any other type of object + # it will not merge with said object, but will instead return + # the original value + # + # On encountering a dictionary, it will create a new dictionary + # composed of the original and the one to merge with, if 'overwrite' + # is enabled then keys that exist in the original will be overwritten + # by keys in the one to merge with (and associated values). Otherwise + # if not in overwrite mode the 2 conflicting keys themselves will + # be merged. + def _on_dict(self, value, merge_with): + if not isinstance(merge_with, (dict)): + return value + merged = dict(value) + for (k, v) in merge_with.items(): + if k in merged: + if not self._overwrite: + merged[k] = self._merger.merge(merged[k], v) + else: + merged[k] = v + else: + merged[k] = v + return merged + +As you can see there is a '_on_dict' method here that will be given a source value +and a value to merge with. The result will be the merged object. This code itself +is called by another merging class which 'directs' the merging to happen by +analyzing the types of the objects to merge and attempting to find a know object +that will merge that type. I will avoid pasting that here, but it can be found +in the mergers/__init__.py file (see LookupMerger and UnknownMerger). + +So following the typical cloud-init way of allowing source code to be downloaded +and used dynamically, it is possible for users to inject there own merging files +to handle specific types of merging as they choose (the basic ones included will +handle lists, dicts, and strings). Note how each merge can have options associated +with it which affect how the merging is performed, for example a dictionary merger +can be told to overwrite instead of attempt to merge, or a string merger can be +told to append strings instead of discarding other strings to merge with. + +How to activate +--------------- + +There are a few ways to activate the merging algorithms, and to customize them +for your own usage. + +1. The first way involves the usage of MIME messages in cloud-init to specify + multipart documents (this is one way in which multiple cloud-config is joined + together into a single cloud-config). Two new headers are looked for, both + of which can define the way merging is done (the first header to exist wins). + These new headers (in lookup order) are 'Merge-Type' and 'X-Merge-Type'. The value + should be a string which will satisfy the new merging format defintion (see + below for this format). +2. The second way is actually specifying the merge-type in the body of the + cloud-config dictionary. There are 2 ways to specify this, either as a string + or as a dictionary (see format below). The keys that are looked up for this + definition are the following (in order), 'merge_how', 'merge_type'. + +*String format* + +The string format that is expected is the following. + +"classname(option1,option2)+classname2(option3,option4)" (and so on) + +The class name there will be connected to class names used when looking for the +class that can be used to merge and options provided will be given to the class +on construction of that class. + +For example, the default string that is used when none is provided is the following: + +"list(extend)+dict()+str(append)" + +*Dictionary format* + +In cases where a dictionary can be used to specify the same information as the +string format (ie option #2 of above) it can be used, for example. + +merge_how: + - name: list + settings: [extend] + - name: dict + settings: [] + - name: str + settings: [append] + +This would be the equivalent format for default string format but in dictionary +form instead of string form. + +Specifying multiple types and its effect +---------------------------------------- + +Now you may be asking yourself, if I specify a merge-type header or dictionary +for every cloud-config that I provide, what exactly happens? + +The answer is that when merging, a stack of 'merging classes' is kept, the +first one on that stack is the default merging classes, this set of mergers +will be used when the first cloud-config is merged with the initial empty +cloud-config dictionary. If the cloud-config that was just merged provided a +set of merging classes (via the above formats) then those merging classes will +be pushed onto the stack. Now if there is a second cloud-config to be merged then +the merging classes from the cloud-config before the first will be used (not the +default) and so on. This way a cloud-config can decide how it will merge with a +cloud-config dictionary coming after it. + +Other uses +---------- + +The default merging algorithm for merging conf.d yaml files (which form a initial +yaml config for cloud-init) was also changed to use this mechanism so its full +benefits (and customization) can also be used there as well. Other places that +used the previous merging are also similar now extensible (metadata merging for +example). From d01d4935724b78d41b08fc58f0afbce29c1c3dc6 Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 7 Mar 2013 16:28:09 -0500 Subject: [PATCH 14/16] change default merge type the default merge type here was appending to strings and extending lists. Instead we want the same default that cloud-init had previously, which was to overwrite lists and strings. --- cloudinit/mergers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py index 45e88fb3..3b56686f 100644 --- a/cloudinit/mergers/__init__.py +++ b/cloudinit/mergers/__init__.py @@ -25,7 +25,7 @@ from cloudinit import type_utils NAME_MTCH = re.compile(r"(^[a-zA-Z_][A-Za-z0-9_]*)\((.*?)\)$") LOG = logging.getLogger(__name__) -DEF_MERGE_TYPE = "list(extend)+dict()+str(append)" +DEF_MERGE_TYPE = "list()+dict()+str()" class UnknownMerger(object): From 149b2480ddbc5da1adf255b219893324927e740c Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 7 Mar 2013 16:47:54 -0500 Subject: [PATCH 15/16] fix pep8 and pylint --- cloudinit/config/cc_landscape.py | 3 ++- cloudinit/distros/__init__.py | 4 +-- cloudinit/mergers/__init__.py | 5 ++-- cloudinit/mergers/str.py | 2 +- cloudinit/sources/DataSourceNone.py | 1 - tests/unittests/test__init__.py | 2 -- tests/unittests/test_merging.py | 8 +++--- tests/unittests/test_userdata.py | 39 ++++++++++++++++------------- 8 files changed, 34 insertions(+), 30 deletions(-) diff --git a/cloudinit/config/cc_landscape.py b/cloudinit/config/cc_landscape.py index 47c10a97..8a709677 100644 --- a/cloudinit/config/cc_landscape.py +++ b/cloudinit/config/cc_landscape.py @@ -59,7 +59,8 @@ def handle(_name, cfg, cloud, log, _args): if not isinstance(ls_cloudcfg, (dict)): raise RuntimeError(("'landscape' key existed in config," " but not a dictionary type," - " is a %s instead"), type_utils.obj_name(ls_cloudcfg)) + " is a %s instead"), + type_utils.obj_name(ls_cloudcfg)) if not ls_cloudcfg: return diff --git a/cloudinit/distros/__init__.py b/cloudinit/distros/__init__.py index 7b6276c5..50d52594 100644 --- a/cloudinit/distros/__init__.py +++ b/cloudinit/distros/__init__.py @@ -741,7 +741,7 @@ def normalize_users_groups(cfg, distro): } if not isinstance(old_user, (dict)): LOG.warn(("Format for 'user' key must be a string or " - "dictionary and not %s"), util.obj_name(old_user)) + "dictionary and not %s"), type_utils.obj_name(old_user)) old_user = {} # If no old user format, then assume the distro @@ -767,7 +767,7 @@ def normalize_users_groups(cfg, distro): if not isinstance(base_users, (list, dict, str, basestring)): LOG.warn(("Format for 'users' key must be a comma separated string" " or a dictionary or a list and not %s"), - util.obj_name(base_users)) + type_utils.obj_name(base_users)) base_users = [] if old_user: diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py index 3b56686f..ac16f143 100644 --- a/cloudinit/mergers/__init__.py +++ b/cloudinit/mergers/__init__.py @@ -32,7 +32,7 @@ class UnknownMerger(object): # Named differently so auto-method finding # doesn't pick this up if there is ever a type # named "unknown" - def _handle_unknown(self, meth_wanted, value, merge_with): + def _handle_unknown(self, _meth_wanted, value, _merge_with): return value # This merging will attempt to look for a '_on_X' method @@ -119,7 +119,8 @@ def string_extract_mergers(merge_how): continue match = NAME_MTCH.match(m_name) if not match: - msg = "Matcher identifer '%s' is not in the right format" % (m_name) + msg = ("Matcher identifer '%s' is not in the right format" % + (m_name)) raise ValueError(msg) (m_name, m_ops) = match.groups() m_ops = m_ops.strip().split(",") diff --git a/cloudinit/mergers/str.py b/cloudinit/mergers/str.py index f1534c5b..291c91c2 100644 --- a/cloudinit/mergers/str.py +++ b/cloudinit/mergers/str.py @@ -18,7 +18,7 @@ class Merger(object): - def __init__(self, merger, opts): + def __init__(self, _merger, opts): self._append = 'append' in opts # On encountering a unicode object to merge value with diff --git a/cloudinit/sources/DataSourceNone.py b/cloudinit/sources/DataSourceNone.py index e2175e1f..12a8a992 100644 --- a/cloudinit/sources/DataSourceNone.py +++ b/cloudinit/sources/DataSourceNone.py @@ -18,7 +18,6 @@ from cloudinit import log as logging from cloudinit import sources -from cloudinit import util LOG = logging.getLogger(__name__) diff --git a/tests/unittests/test__init__.py b/tests/unittests/test__init__.py index 7924755a..56ccbcfb 100644 --- a/tests/unittests/test__init__.py +++ b/tests/unittests/test__init__.py @@ -24,8 +24,6 @@ class FakeModule(handlers.Handler): def handle_part(self, data, ctype, filename, payload, frequency): pass - - class TestWalkerHandleHandler(MockerTestCase): diff --git a/tests/unittests/test_merging.py b/tests/unittests/test_merging.py index fa7ee8e4..591a99c8 100644 --- a/tests/unittests/test_merging.py +++ b/tests/unittests/test_merging.py @@ -1,5 +1,3 @@ -import os - from tests.unittests import helpers from cloudinit import mergers @@ -107,8 +105,10 @@ class TestSimpleRun(helpers.MockerTestCase): self.assertEquals(merged['a'], [1, 'b', 2, 'e', 'f', 'g']) self.assertEquals(merged['b'], 'blahblahmore') self.assertEquals(merged['c']['f'], 'bigblobofstuff') - self.assertEquals(merged['run'], ['runme', 'runme2', 'morecmd', 'moremoremore']) - self.assertEquals(merged['runmereally'], ['e', ['a'], 'd', 'blah', ['b'], 'e']) + self.assertEquals(merged['run'], ['runme', 'runme2', 'morecmd', + 'moremoremore']) + self.assertEquals(merged['runmereally'], ['e', ['a'], 'd', 'blah', + ['b'], 'e']) def test_dict_overwrite_layered(self): source = { diff --git a/tests/unittests/test_userdata.py b/tests/unittests/test_userdata.py index ef0dd7b8..48ad9c5f 100644 --- a/tests/unittests/test_userdata.py +++ b/tests/unittests/test_userdata.py @@ -7,8 +7,6 @@ import os from email.mime.base import MIMEBase -from mocker import MockerTestCase - from cloudinit import handlers from cloudinit import helpers as c_helpers from cloudinit import log @@ -97,14 +95,16 @@ p: 1 new_root = self.makeDir() self.patchUtils(new_root) self.patchOS(new_root) - cloud_cfg.handle_part(None, handlers.CONTENT_START, None, None, None, None) + cloud_cfg.handle_part(None, handlers.CONTENT_START, None, None, None, + None) for i, m in enumerate(messages): headers = dict(m) fn = "part-%s" % (i + 1) payload = m.get_payload(decode=True) cloud_cfg.handle_part(None, headers['Content-Type'], fn, payload, None, headers) - cloud_cfg.handle_part(None, handlers.CONTENT_END, None, None, None, None) + cloud_cfg.handle_part(None, handlers.CONTENT_END, None, None, None, + None) contents = util.load_file(paths.get_ipath('cloud_config')) contents = util.load_yaml(contents) self.assertEquals(contents['run'], ['b', 'c', 'stuff', 'morestuff']) @@ -118,8 +118,9 @@ p: 1 data = "arbitrary text\n" ci.datasource = FakeDataSource(data) - self.mock_write = self.mocker.replace("cloudinit.util.write_file", passthrough=False) - self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) + mock_write = self.mocker.replace("cloudinit.util.write_file", + passthrough=False) + mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) self.mocker.replay() log_file = self.capture_log(logging.WARNING) @@ -136,8 +137,9 @@ p: 1 message.set_payload("Just text") ci.datasource = FakeDataSource(message.as_string()) - self.mock_write = self.mocker.replace("cloudinit.util.write_file", passthrough=False) - self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) + mock_write = self.mocker.replace("cloudinit.util.write_file", + passthrough=False) + mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) self.mocker.replay() log_file = self.capture_log(logging.WARNING) @@ -154,9 +156,10 @@ p: 1 ci.datasource = FakeDataSource(script) outpath = os.path.join(ci.paths.get_ipath_cur("scripts"), "part-001") - self.mock_write = self.mocker.replace("cloudinit.util.write_file", passthrough=False) - self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) - self.mock_write(outpath, script, 0700) + mock_write = self.mocker.replace("cloudinit.util.write_file", + passthrough=False) + mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) + mock_write(outpath, script, 0700) self.mocker.replay() log_file = self.capture_log(logging.WARNING) @@ -173,9 +176,10 @@ p: 1 ci.datasource = FakeDataSource(message.as_string()) outpath = os.path.join(ci.paths.get_ipath_cur("scripts"), "part-001") - self.mock_write = self.mocker.replace("cloudinit.util.write_file", passthrough=False) - self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) - self.mock_write(outpath, script, 0700) + mock_write = self.mocker.replace("cloudinit.util.write_file", + passthrough=False) + mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) + mock_write(outpath, script, 0700) self.mocker.replay() log_file = self.capture_log(logging.WARNING) @@ -192,9 +196,10 @@ p: 1 ci.datasource = FakeDataSource(message.as_string()) outpath = os.path.join(ci.paths.get_ipath_cur("scripts"), "part-001") - self.mock_write = self.mocker.replace("cloudinit.util.write_file", passthrough=False) - self.mock_write(outpath, script, 0700) - self.mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) + mock_write = self.mocker.replace("cloudinit.util.write_file", + passthrough=False) + mock_write(outpath, script, 0700) + mock_write(ci.paths.get_ipath("cloud_config"), "", 0600) self.mocker.replay() log_file = self.capture_log(logging.WARNING) From 626d07a00eb9c7cb4ca8c99c65f042ca5ac8cb8a Mon Sep 17 00:00:00 2001 From: Scott Moser Date: Thu, 7 Mar 2013 17:13:05 -0500 Subject: [PATCH 16/16] more pep8/pylint. all clean now --- cloudinit/handlers/boot_hook.py | 3 ++- cloudinit/handlers/cloud_config.py | 7 ++++--- cloudinit/handlers/shell_script.py | 3 ++- cloudinit/handlers/upstart_job.py | 3 ++- cloudinit/mergers/__init__.py | 2 -- tests/unittests/test__init__.py | 3 ++- tests/unittests/test_merging.py | 1 - tests/unittests/test_userdata.py | 6 +++--- 8 files changed, 15 insertions(+), 13 deletions(-) diff --git a/cloudinit/handlers/boot_hook.py b/cloudinit/handlers/boot_hook.py index bf313f10..bf2899ab 100644 --- a/cloudinit/handlers/boot_hook.py +++ b/cloudinit/handlers/boot_hook.py @@ -56,7 +56,8 @@ class BootHookPartHandler(handlers.Handler): util.write_file(filepath, contents, 0700) return filepath - def handle_part(self, _data, ctype, filename, payload, _frequency): + def handle_part(self, _data, ctype, filename, # pylint: disable=W0221 + payload, frequency): # pylint: disable=W0613 if ctype in handlers.CONTENT_SIGNALS: return diff --git a/cloudinit/handlers/cloud_config.py b/cloudinit/handlers/cloud_config.py index 5f519f78..d30d6338 100644 --- a/cloudinit/handlers/cloud_config.py +++ b/cloudinit/handlers/cloud_config.py @@ -46,7 +46,7 @@ class CloudConfigPartHandler(handlers.Handler): handlers.type_from_starts_with("#cloud-config"), ] - def _write_cloud_config(self, buf): + def _write_cloud_config(self): if not self.cloud_fn: return # Capture which files we merged from... @@ -107,12 +107,13 @@ class CloudConfigPartHandler(handlers.Handler): self.cloud_buf = None self.mergers = [DEF_MERGERS] - def handle_part(self, _data, ctype, filename, payload, _freq, headers): + def handle_part(self, _data, ctype, filename, # pylint: disable=W0221 + payload, _frequency, headers): # pylint: disable=W0613 if ctype == handlers.CONTENT_START: self._reset() return if ctype == handlers.CONTENT_END: - self._write_cloud_config(self.cloud_buf) + self._write_cloud_config() self._reset() return try: diff --git a/cloudinit/handlers/shell_script.py b/cloudinit/handlers/shell_script.py index 2a87e8dd..b185c374 100644 --- a/cloudinit/handlers/shell_script.py +++ b/cloudinit/handlers/shell_script.py @@ -41,7 +41,8 @@ class ShellScriptPartHandler(handlers.Handler): handlers.type_from_starts_with("#!"), ] - def handle_part(self, _data, ctype, filename, payload, _frequency): + def handle_part(self, _data, ctype, filename, # pylint: disable=W0221 + payload, frequency): # pylint: disable=W0613 if ctype in handlers.CONTENT_SIGNALS: # TODO(harlowja): maybe delete existing things here return diff --git a/cloudinit/handlers/upstart_job.py b/cloudinit/handlers/upstart_job.py index 3d8833a1..edd56527 100644 --- a/cloudinit/handlers/upstart_job.py +++ b/cloudinit/handlers/upstart_job.py @@ -42,7 +42,8 @@ class UpstartJobPartHandler(handlers.Handler): handlers.type_from_starts_with("#upstart-job"), ] - def handle_part(self, _data, ctype, filename, payload, frequency): + def handle_part(self, _data, ctype, filename, # pylint: disable=W0221 + payload, frequency): if ctype in handlers.CONTENT_SIGNALS: return diff --git a/cloudinit/mergers/__init__.py b/cloudinit/mergers/__init__.py index ac16f143..e1ff57ba 100644 --- a/cloudinit/mergers/__init__.py +++ b/cloudinit/mergers/__init__.py @@ -152,5 +152,3 @@ def construct(parsed_mergers): for (attr, opts) in mergers_to_be: mergers.append(attr(root, opts)) return root - - diff --git a/tests/unittests/test__init__.py b/tests/unittests/test__init__.py index 56ccbcfb..2c0abfbc 100644 --- a/tests/unittests/test__init__.py +++ b/tests/unittests/test__init__.py @@ -22,7 +22,8 @@ class FakeModule(handlers.Handler): def list_types(self): return self.types - def handle_part(self, data, ctype, filename, payload, frequency): + def handle_part(self, _data, ctype, filename, # pylint: disable=W0221 + payload, frequency): pass diff --git a/tests/unittests/test_merging.py b/tests/unittests/test_merging.py index 591a99c8..ad137e85 100644 --- a/tests/unittests/test_merging.py +++ b/tests/unittests/test_merging.py @@ -140,4 +140,3 @@ class TestSimpleRun(helpers.MockerTestCase): 'e': 'f', } }) - diff --git a/tests/unittests/test_userdata.py b/tests/unittests/test_userdata.py index 48ad9c5f..fdfe2542 100644 --- a/tests/unittests/test_userdata.py +++ b/tests/unittests/test_userdata.py @@ -55,7 +55,7 @@ class TestConsumeUserData(helpers.FilesystemMockingTestCase): #cloud-config a: b e: f -run: +run: - b - c ''' @@ -67,7 +67,7 @@ run: #cloud-config a: e e: g -run: +run: - stuff - morestuff ''' @@ -77,7 +77,7 @@ run: blob3 = ''' #cloud-config -e: +e: - 1 - 2 - 3