bareon-ironic/bareon_ironic/modules/bareon_base.py

#
# Copyright 2017 Cray Inc., All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

"""
Bareon deploy driver.
"""

import abc
import inspect
import json
import os
import pprint
import stat
import sys

import eventlet
import pkg_resources
import stevedore
import six
from oslo_concurrency import processutils
from oslo_config import cfg
from oslo_log import log
from oslo_service import loopingcall

from ironic.common import boot_devices
from ironic.common import exception
from ironic.common import states
from ironic.common.i18n import _
from ironic.common.i18n import _LI
from ironic.conductor import task_manager
from ironic.conductor import utils as manager_utils
from ironic.drivers import base
from ironic.drivers.modules import deploy_utils
from ironic.objects import node as db_node

from bareon_ironic.modules import bareon_exception
from bareon_ironic.modules import bareon_utils
from bareon_ironic.modules.resources import actions
from bareon_ironic.modules.resources import image_service
from bareon_ironic.modules.resources import resources

agent_opts = [
    cfg.StrOpt('bareon_pxe_append_params',
               default='nofb nomodeset vga=normal',
               help='Additional append parameters for baremetal PXE boot.'),
    cfg.StrOpt('deploy_kernel',
               help='UUID (from Glance) of the default deployment kernel.'),
    cfg.StrOpt('deploy_ramdisk',
               help='UUID (from Glance) of the default deployment ramdisk.'),
    cfg.StrOpt('deploy_config_priority',
               default='instance:node:image:conf',
               help='Priority for deploy config'),
    cfg.StrOpt('deploy_config',
               help='A uuid or name of glance image representing '
                    'deploy config.'),
    cfg.IntOpt('deploy_timeout',
               default=15,
               help="Timeout in minutes for the node continue-deploy process "
                    "(deployment phase following the callback)."),
    cfg.IntOpt('check_terminate_interval',
               help='Time interval in seconds to check whether the deployment '
                    'driver has responded to termination signal',
               default=5),
    cfg.IntOpt('check_terminate_max_retries',
               help='Max retries to check is node already terminated',
               default=20),
    cfg.StrOpt('agent_data_driver',
               default='ironic',
               help='Bareon data driver'),
]

CONF = cfg.CONF
CONF.register_opts(agent_opts, group='bareon')

LOG = log.getLogger(__name__)

REQUIRED_PROPERTIES = {}
OTHER_PROPERTIES = {
    'deploy_kernel': _('UUID (from Glance) of the deployment kernel.'),
    'deploy_ramdisk': _('UUID (from Glance) of the deployment ramdisk.'),
    'bareon_username': _('SSH username; default is "root" Optional.'),
    'bareon_key_filename': _('Name of SSH private key file; default is '
                             '"/etc/ironic/bareon_key". Optional.'),
    'bareon_ssh_port': _('SSH port; default is 22. Optional.'),
    'bareon_deploy_script': _('path to bareon executable entry point; '
                              'default is "bareon-provision" Optional.'),
    'deploy_config': _('Deploy config Glance image id/name'),
}
COMMON_PROPERTIES = OTHER_PROPERTIES

REQUIRED_BAREON_VERSION = "0.0."

TERMINATE_FLAG = 'terminate_deployment'


def _clean_up_images(task):
    node = task.node
    if node.instance_info.get('images_cleaned_up', False):
        return
    try:
        with open(get_tenant_images_json_path(node)) as f:
            images_json = json.loads(f.read())
    except Exception as ex:
        LOG.warning("Cannot find tenant_images.json for the %s node to"
                    "finish cleanup." % node)
        LOG.warning(str(ex))
    else:
        images = resources.ResourceList.from_dict(images_json, task)
        images.cleanup_resources()
        bareon_utils.change_node_dict(task.node, 'instance_info',
                                      {'images_cleaned_up': True})


class BareonDeploy(base.DeployInterface):
    """Interface for deploy-related actions."""

    def __init__(self):
        super(BareonDeploy, self).__init__()
        self._deployment_config_validators = {}

    def get_properties(self):
        """Return the properties of the interface.

        :returns: dictionary of <property name>:<property description> entries.
        """
        return COMMON_PROPERTIES

    def validate(self, task):
        """Validate the driver-specific Node deployment info.

        This method validates whether the properties of the supplied node
        contain the required information for this driver to deploy images to
        the node.

        :param task: a TaskManager instance
        :raises: MissingParameterValue
        """

        _NodeDriverInfoAdapter(task.node)
        self._validate_deployment_config(task)

    @task_manager.require_exclusive_lock
    def deploy(self, task):
        """Perform a deployment to a node.

        Perform the necessary work to deploy an image onto the specified node.
        This method will be called after prepare(), which may have already
        performed any preparatory steps, such as pre-caching some data for the
        node.

        :param task: a TaskManager instance.
        :returns: status of the deploy. One of ironic.common.states.
        """
        manager_utils.node_power_action(task, states.REBOOT)
        return states.DEPLOYWAIT

    @task_manager.require_exclusive_lock
    def tear_down(self, task):
        """Tear down a previous deployment on the task's node.

        :param task: a TaskManager instance.
        :returns: status of the deploy. One of ironic.common.states.
        """
        manager_utils.node_power_action(task, states.POWER_OFF)
        return states.DELETED

    def prepare(self, task):
        """Prepare the deployment environment for this node.

        :param task: a TaskManager instance.
        """

        self._fetch_resources(task)

        # Temporary set possible missing driver_info fields. This changes will
        # not become persistent until someone do
        # node.driver_info = updated_driver_info
        # node.save()
        driver_info = task.node.driver_info
        for field, value in (
                ('deploy_kernel', CONF.bareon.deploy_kernel),
                ('deploy_ramdisk', CONF.bareon.deploy_ramdisk)):
            driver_info.setdefault(field, value)

        task.driver.boot.prepare_ramdisk(task,
                                         self._build_pxe_config_options(task))

    def clean_up(self, task):
        """Clean up the deployment environment for this node.

        If preparation of the deployment environment ahead of time is possible,
        this method should be implemented by the driver. It should erase
        anything cached by the `prepare` method.

        If implemented, this method must be idempotent. It may be called
        multiple times for the same node on the same conductor, and it may be
        called by multiple conductors in parallel. Therefore, it must not
        require an exclusive lock.

        This method is called before `tear_down`.

        :param task: a TaskManager instance.
        """

        task.driver.boot.clean_up_ramdisk(task)
        _clean_up_images(task)

    def take_over(self, task):
        pass

    def _fetch_resources(self, task):
        self._fetch_provision_json(task)
        self._fetch_actions(task)

    def _fetch_provision_json(self, task):
        config = self._get_deploy_config(task)
        config = self._add_image_deployment_config(task, config)

        deploy_data = config.get('deploy_data', {})
        if 'kernel_params' not in deploy_data:
            deploy_data['kernel_params'] = CONF.bareon.bareon_pxe_append_params
        config['deploy_data'] = deploy_data

        LOG.info('[{0}] Resulting provision.json is:\n{1}'.format(
            task.node.uuid, config))

        # On fail script is not passed to the agent, it is handled on
        # Conductor.
        on_fail_script_url = config.pop("on_fail_script", None)
        self._fetch_on_fail_script(task, on_fail_script_url)

        filename = get_provision_json_path(task.node)
        LOG.info('[{0}] Writing provision.json to:\n{1}'.format(
            task.node.uuid, filename))
        with open(filename, 'w') as f:
            f.write(json.dumps(config))

    def _validate_deployment_config(self, task):
        data_driver_name = bareon_utils.node_data_driver(task.node)
        validator = self._get_deployment_config_validator(data_driver_name)
        validator(get_provision_json_path(task.node))

    def _get_deploy_config(self, task):
        node = task.node
        instance_info = node.instance_info

        # Get options passed by nova, if any.
        deploy_config_options = instance_info.get('deploy_config_options', {})
        # Get options available at ironic side.
        deploy_config_options['node'] = node.driver_info.get('deploy_config')
        deploy_config_options['conf'] = CONF.bareon.deploy_config
        # Cleaning empty options.
        deploy_config_options = {k: v for k, v in
                                 six.iteritems(deploy_config_options) if v}

        configs = self._fetch_deploy_configs(task.context, node,
                                             deploy_config_options)
        return self._merge_configs(configs)

    def _fetch_deploy_configs(self, context, node, cfg_options):
        configs = {}
        for key, url in six.iteritems(cfg_options):
            configs[key] = resources.url_download_json(context, node,
                                                       url)
        return configs

    @staticmethod
    def _merge_configs(configs):
        # Merging first level attributes of configs according to priority
        priority_list = CONF.bareon.deploy_config_priority.split(':')
        unknown_sources = set(priority_list) - {'instance', 'node', 'conf',
                                                'image'}
        if unknown_sources:
            raise ValueError('Unknown deploy config source %s' % str(
                unknown_sources))

        result = {}
        for k in priority_list[::-1]:
            if k in configs:
                result.update(configs[k])
        LOG.debug('Resulting deploy config:')
        LOG.debug('%s', result)
        return result

    def _fetch_on_fail_script(self, task, url):
        if not url:
            return
        path = get_on_fail_script_path(task.node)
        LOG.info('[{0}] Fetching on_fail_script to:\n{1}'.format(
            task.node.uuid, path))
        resources.url_download(task.context, task.node, url, path)

    def _fetch_actions(self, task):
        driver_actions_url = task.node.instance_info.get('driver_actions')
        actions_data = resources.url_download_json(task.context,
                                                   task.node,
                                                   driver_actions_url)
        if not actions_data:
            LOG.info("[%s] No driver_actions specified" % task.node.uuid)
            return

        controller = actions.ActionController(task, actions_data)
        controller.fetch_action_resources()

        actions_data = controller.to_dict()
        LOG.info('[{0}] Deploy actions for the node are:\n{1}'.format(
            task.node.uuid, actions_data))

        filename = get_actions_json_path(task.node)
        LOG.info('[{0}] Writing actions.json to:\n{1}'.format(
            task.node.uuid, filename))
        with open(filename, 'w') as f:
            f.write(json.dumps(actions_data))

    def _build_pxe_config_options(self, task):
        """Builds the pxe config options for booting agent.

        This method builds the config options to be replaced on
        the agent pxe config template.

        :param task: a TaskManager instance
        :returns: a dict containing the options to be applied on
        the agent pxe config template.
        """

        agent_config_opts = {
            'deployment_id': task.node.uuid,
            'ironic_api_url': deploy_utils.get_ironic_api_url(),
        }

        return agent_config_opts

    def _get_image_resource_mode(self):
        raise NotImplementedError

    def _get_deploy_driver(self):
        raise NotImplementedError

    def _add_image_deployment_config(self, task, provision_config):
        node = task.node
        bareon_utils.change_node_dict(
            node, 'instance_info',
            {'deploy_driver': self._get_deploy_driver()})
        node.save()

        image_resource_mode = self._get_image_resource_mode()
        boot_image = node.instance_info['image_source']
        default_user_images = [
            {
                'name': boot_image,
                'url': boot_image,
                'target': "/",
            }
        ]
        user_images = provision_config.get('images', default_user_images)

        invalid_images = []
        origin_names = [None] * len(user_images)
        for idx, image in enumerate(user_images):
            try:
                bareon_utils.validate_json(('name', 'url'), image)
            except exception.MissingParameterValue as e:
                invalid_images.append(
                    'Invalid "image" record - there is no key {key} (#{idx}: '
                    '{payload})'.format(
                        key=e, idx=idx, payload=json.dumps(image)))
                continue

            origin_names[idx] = image['name']
            image_uuid, image_name = image_service.get_glance_image_uuid_name(
                task, image['url'])
            image['boot'] = (boot_image == image_uuid)
            image['url'] = "glance:%s" % image_uuid
            image['mode'] = image_resource_mode
            image['image_uuid'] = image_uuid
            image['image_name'] = image_name

        if invalid_images:
            raise exception.InvalidParameterValue(
                err='\n'.join(invalid_images))

        fetched_image_resources = self._fetch_images(task, user_images)

        image_deployment_config = [
            {
                # Grab name from source data to keep it untouched, because
                # "resources" subsystem replace all not alphanumeric symbols
                # to underscores in 'name' field.
                'name': name,
                'image_pull_url': image.pull_url,
                'target': image.target,
                'boot': image.boot,
                'image_uuid': image.image_uuid,
                'image_name': image.image_name
            }
            for name, image in zip(origin_names, fetched_image_resources)
        ]

        bareon_utils.change_node_dict(
            task.node, 'instance_info',
            {'multiboot': len(image_deployment_config) > 1})
        node.save()

        provision_config['images'] = image_deployment_config
        return provision_config

    def _fetch_images(self, task, image_resources):
        images = resources.ResourceList({
            "name": "tenant_images",
            "resources": image_resources
        }, task)
        images.fetch_resources()

        # NOTE(lobur): serialize tenant images json for further cleanup.
        images_json = images.to_dict()
        with open(get_tenant_images_json_path(task.node), 'w') as f:
            f.write(json.dumps(images_json))

        return images.resources

    def terminate_deployment(self, task):
        node = task.node
        if TERMINATE_FLAG not in node.instance_info:

            def _wait_for_node_to_become_terminated(retries, max_retries,
                                                    task):
                task_node = task.node
                retries[0] += 1
                if retries[0] > max_retries:
                    bareon_utils.change_node_dict(
                        task_node, 'instance_info',
                        {TERMINATE_FLAG: 'failed'})
                    task_node.reservation = None
                    task_node.save()

                    raise bareon_exception.RetriesException(
                        retry_count=max_retries)

                current_node = db_node.Node.get_by_uuid(task.context,
                                                        task_node.uuid)
                if current_node.instance_info.get(TERMINATE_FLAG) == 'done':
                    raise loopingcall.LoopingCallDone()

            bareon_utils.change_node_dict(
                node, 'instance_info',
                {TERMINATE_FLAG: 'requested'})
            node.save()

            retries = [0]
            interval = CONF.bareon.check_terminate_interval
            max_retries = CONF.bareon.check_terminate_max_retries

            timer = loopingcall.FixedIntervalLoopingCall(
                _wait_for_node_to_become_terminated,
                retries, max_retries, task)
            try:
                timer.start(interval=interval).wait()
            except bareon_exception.RetriesException as ex:
                LOG.error('Failed to terminate node. Error: %(error)s' % {
                    'error': ex})

    @property
    def can_terminate_deployment(self):
        return True

    def _get_deployment_config_validator(self, driver_name):
        try:
            validator = self._deployment_config_validators[driver_name]
        except KeyError:
            validator = DeploymentConfigValidator(driver_name)
            self._deployment_config_validators[driver_name] = validator
        return validator


class BareonVendor(base.VendorInterface):
    def get_properties(self):
        """Return the properties of the interface.

        :returns: dictionary of <property name>:<property description> entries.
        """
        return COMMON_PROPERTIES

    def validate(self, task, method=None, **kwargs):
        """Validate the driver-specific Node deployment info.

        :param task: a TaskManager instance
        :param method: method to be validated
        """
        if method in ('exec_actions', 'deploy_steps'):
            return

        if method == 'switch_boot':
            self.validate_switch_boot(task, **kwargs)
            return

        if not kwargs.get('address'):
            raise exception.MissingParameterValue(_('Bareon must pass '
                                                    'address of a node.'))
        _NodeDriverInfoAdapter(task.node)

    def validate_switch_boot(self, task, **kwargs):
        if not kwargs.get('image'):
            raise exception.MissingParameterValue(_('No image info passed.'))
        if not kwargs.get('ssh_key'):
            raise exception.MissingParameterValue(_('No ssh key info passed.'))
        if not kwargs.get('ssh_user'):
            raise exception.MissingParameterValue(_('No ssh user info '
                                                    'passed.'))

    @base.passthru(['GET', 'POST'], async=False)
    def deploy_steps(self, task, **data):
        http_method = data.pop('http_method')
        driver_info = _NodeDriverInfoAdapter(task.node)

        if http_method == 'GET':
            ssh_keys_step = _InjectSSHKeyStepRequest(task, driver_info)
            return ssh_keys_step()

        steps_mapping = _DeployStepMapping()
        data = _DeployStepsAdapter(data)
        try:
            request_cls = steps_mapping.name_to_step[data.action]
        except KeyError:
            if data.action is not None:
                raise RuntimeError(
                    'There is no name mapping for deployment step: '
                    '{!r}'.format(data.action))

            message = (
                'Bareon\'s callback service have failed with internall error')
            if data.status_details:
                message += '\nFailure details: {}'.format(
                    pprint.pformat(data.status_details))
            # TODO(dbogun): add support for existing log extraction mechanism
            deploy_utils.set_failed_state(
                task, message, collect_logs=False)
        else:
            handler = request_cls.result_handler(
                task, driver_info, data)
            handler()

        return {'url': None}

    @base.passthru(['POST'])
    @task_manager.require_exclusive_lock
    def pass_deploy_info(self, task, **kwargs):
        """Continues the deployment of baremetal node."""
        node = task.node
        task.process_event('resume')

        driver_info = _NodeDriverInfoAdapter(task.node)

        cmd = '{} --data_driver "{}" --deploy_driver "{}"'.format(
            driver_info.entry_point, bareon_utils.node_data_driver(node),
            node.instance_info['deploy_driver'])
        if CONF.debug:
            cmd += ' --debug'
        instance_info = node.instance_info

        connect_args = {
            'username': driver_info.ssh_login,
            'key_filename': driver_info.ssh_key,
            'host': kwargs['address']}
        if driver_info.ssh_port:
            connect_args['port'] = driver_info.ssh_port

        try:
            ssh = bareon_utils.get_ssh_connection(task, **connect_args)
            sftp = ssh.open_sftp()

            self._check_bareon_version(ssh, node.uuid)

            provision_config_path = get_provision_json_path(task.node)
            # TODO(yuriyz) no hardcode
            sftp.put(provision_config_path, '/tmp/provision.json')

            # swift configdrive store should be disabled
            configdrive = instance_info.get('configdrive')
            if configdrive is not None:
                # TODO(yuriyz) no hardcode
                bareon_utils.sftp_write_to(sftp, configdrive,
                                           '/tmp/config-drive.img')

            out, err = self._deploy(task, ssh, cmd, **connect_args)
            LOG.info(_LI('[%(node)s] Bareon pass on node %(node)s'),
                     {'node': node.uuid})
            LOG.debug('[%s] Bareon stdout is: "%s"', node.uuid, out)
            LOG.debug('[%s] Bareon stderr is: "%s"', node.uuid, err)

            self._get_boot_info(task, ssh)

            self._run_actions(task, ssh, sftp, connect_args)

            manager_utils.node_power_action(task, states.POWER_OFF)
            manager_utils.node_set_boot_device(task, boot_devices.DISK,
                                               persistent=True)
            manager_utils.node_power_action(task, states.POWER_ON)

        except exception.SSHConnectFailed as e:
            msg = (
                _('[%(node)s] SSH connect to node %(host)s failed. '
                  'Error: %(error)s') % {'host': connect_args['host'],
                                         'error': e, 'node': node.uuid})
            self._deploy_failed(task, msg)

        except exception.ConfigInvalid as e:
            msg = (_('[%(node)s] Invalid provision config. '
                     'Error: %(error)s') % {'error': e, 'node': node.uuid})
            self._deploy_failed(task, msg)

        except bareon_exception.DeployTerminationSucceed:
            LOG.info(_LI('[%(node)s] Deployment was terminated'),
                     {'node': node.uuid})

        except Exception as e:
            self._run_on_fail_script(task, sftp, ssh)

            msg = (_('[%(node)s] Deploy failed for node %(node)s. '
                     'Error: %(error)s') % {'node': node.uuid, 'error': e})
            self._bareon_log(task, ssh)
            self._deploy_failed(task, msg)

        else:
            task.process_event('done')
            LOG.info(_LI('Deployment to node %s done'), task.node.uuid)

        finally:
            self._clean_up_deployment_resources(task)

    def _deploy_failed(self, task, msg):
        LOG.error(msg)
        deploy_utils.set_failed_state(task, msg, collect_logs=False)

    def _check_bareon_version(self, ssh, node_uuid):
        try:
            stdout, stderr = processutils.ssh_execute(
                ssh, 'cat /etc/bareon-release')

            LOG.info(_LI("[{0}] Tracing Bareon version.\n{1}").format(
                node_uuid, stdout))

            version = ""
            lines = stdout.splitlines()
            if lines:
                version_line = lines[0]
                name, _, version = version_line.partition("==")
                if version.startswith(REQUIRED_BAREON_VERSION):
                    return

            msg = ("Bareon version '%(req)s' is required, but version "
                   "'%(found)s' found on the ramdisk."
                   % dict(req=REQUIRED_BAREON_VERSION,
                          found=version))
            raise bareon_exception.IncompatibleRamdiskVersion(details=msg)
        except processutils.ProcessExecutionError:
            msg = "Bareon version cannot be read on the ramdisk."
            raise bareon_exception.IncompatibleRamdiskVersion(details=msg)

    def _get_boot_info(self, task, ssh):
        node = task.node
        node_uuid = node.uuid

        if not node.instance_info.get('multiboot', False):
            return
        try:
            stdout, stderr = processutils.ssh_execute(
                ssh, 'cat /tmp/boot_entries.json')
        except processutils.ProcessExecutionError as exec_err:
            LOG.warning(_LI('[%(node)s] Error getting boot info. '
                            'Error: %(error)s') % {'node': node_uuid,
                                                   'error': exec_err})
            raise
        else:
            multiboot_info = json.loads(stdout)
            bareon_utils.change_node_dict(node, 'instance_info', {
                'multiboot_info': multiboot_info
            })
            LOG.info("[{1}] {0} Multiboot info {0}\n{2}"
                     "\n".format("#" * 20, node_uuid, multiboot_info))

    def _run_actions(self, task, ssh, sftp, sshparams):
        actions_path = get_actions_json_path(task.node)
        if not os.path.exists(actions_path):
            LOG.info(_LI("[%(node)s] No actions specified. Skipping")
                     % {'node': task.node.uuid})
            return

        with open(actions_path) as f:
            actions_data = json.loads(f.read())
        actions_controller = actions.ActionController(
            task, actions_data
        )

        actions_controller.execute(ssh, sftp, **sshparams)

    def _bareon_log(self, task, ssh):
        node_uuid = task.node.uuid
        try:
            # TODO(oberezovskyi): Chenge log pulling mechanism (e.g. use
            # remote logging feature of syslog)
            stdout, stderr = processutils.ssh_execute(
                ssh, 'cat /var/log/bareon.log')
        except processutils.ProcessExecutionError as exec_err:
            LOG.warning(_LI('[%(node)s] Error getting Bareon log. '
                            'Error: %(error)s') % {'node': node_uuid,
                                                   'error': exec_err})
        else:
            LOG.info("[{1}] {0} Start Bareon log {0}\n{2}\n"
                     "[{1}] {0} End Bareon log {0}".format("#" * 20,
                                                           node_uuid,
                                                           stdout))

    def _run_on_fail_script(self, task, sftp, ssh):
        node = task.node
        node_uuid = node.uuid
        try:
            on_fail_script_path = get_on_fail_script_path(node)
            if not os.path.exists(on_fail_script_path):
                LOG.info(_LI("[%(node)s] No on_fail_script passed. Skipping")
                         % {'node': node_uuid})
                return

            LOG.debug(_LI('[%(node)s] Uploading on_fail script to the node.'),
                      {'node': node_uuid})
            sftp.put(on_fail_script_path, '/tmp/bareon_on_fail.sh')

            LOG.debug("[%(node)s] Executing on_fail_script."
                      % {'node': node_uuid})
            out, err = processutils.ssh_execute(
                ssh, "bash %s" % '/tmp/bareon_on_fail.sh')

        except processutils.ProcessExecutionError as ex:
            LOG.warning(_LI('[%(node)s] Error executing OnFail script. '
                            'Error: %(er)s') % {'node': node_uuid, 'er': ex})

        except exception.SSHConnectFailed as ex:
            LOG.warning(_LI('[%(node)s] SSH connection error. '
                            'Error: %(er)s') % {'node': node_uuid, 'er': ex})

        except Exception as ex:
            LOG.warning(_LI('[%(node)s] Unknown error. '
                            'Error: %(error)s') % {'node': node_uuid,
                                                   'error': ex})
        else:
            LOG.info(
                "{0} [{1}] on_fail sctipt result below {0}".format("#" * 40,
                                                                   node_uuid))
            LOG.info(out)
            LOG.info(err)
            LOG.info("{0} [{1}] End on_fail script "
                     "result {0}".format("#" * 40, node_uuid))

    def _clean_up_deployment_resources(self, task):
        _clean_up_images(task)
        self._clean_up_actions(task)

    def _clean_up_actions(self, task):
        filename = get_actions_json_path(task.node)
        if not os.path.exists(filename):
            return

        with open(filename) as f:
            actions_data = json.loads(f.read())

        controller = actions.ActionController(task, actions_data)
        controller.cleanup_action_resources()

    @base.passthru(['POST'])
    @task_manager.require_exclusive_lock
    def exec_actions(self, task, **kwargs):
        actions_json = resources.url_download_json(
            task.context, task.node, kwargs.get('driver_actions'))
        if not actions_json:
            LOG.info("[%s] No driver_actions specified." % task.node.uuid)
            return

        ssh_user = actions_json.pop('action_user')
        ssh_key_url = actions_json.pop('action_key')
        node_ip = bareon_utils.get_node_ip(task)

        controller = actions.ActionController(task, actions_json)
        controller.ssh_and_execute(node_ip, ssh_user, ssh_key_url)

    def _execute_deploy_script(self, task, ssh, cmd, *args, **kwargs):
        # NOTE(oberezovskyi): minutes to seconds
        timeout = CONF.bareon.deploy_timeout * 60
        LOG.debug('[%s] Running cmd (SSH): %s', task.node.uuid, cmd)
        try:
            out, err = bareon_utils.ssh_execute(ssh, cmd, timeout=timeout,
                                                check_exit_code=True)
        except exception.SSHCommandFailed as err:
            LOG.debug('[%s] Deploy script execute failed: "%s"',
                      task.node.uuid, err)
            raise bareon_exception.DeploymentTimeout(timeout=timeout)
        return out, err

    def _deploy(self, task, ssh, cmd, **params):
        deployment_thread = eventlet.spawn(self._execute_deploy_script,
                                           task, ssh, cmd, **params)

        def _wait_for_deployment_finished(task, thread):
            task_node = task.node
            current_node = db_node.Node.get_by_uuid(task.context,
                                                    task_node.uuid)

            # NOTE(oberezovskyi): greenthread have no way to check is
            # thread already finished, so need to access to
            # private variable
            if thread._exit_event.ready():
                raise loopingcall.LoopingCallDone()

            if (current_node.instance_info.get(TERMINATE_FLAG,
                                               '') == 'requested'):
                thread.kill()
                bareon_utils.change_node_dict(
                    task_node, 'instance_info',
                    {TERMINATE_FLAG: 'done'})
                task_node.save()
                raise bareon_exception.DeployTerminationSucceed()

        timer = loopingcall.FixedIntervalLoopingCall(
            _wait_for_deployment_finished, task, deployment_thread)
        timer.start(interval=5).wait()
        return deployment_thread.wait()

    @base.passthru(['POST'], async=False)
    @task_manager.require_exclusive_lock
    def switch_boot(self, task, **kwargs):
        # NOTE(oberezovskyi): exception messages should not be changed because
        # of hardcode in nova-ironic driver
        image = kwargs.get('image')
        LOG.info('[{0}] Attempt to switch boot to {1} '
                 'image'.format(task.node.uuid, image))

        msg = ""
        try:
            if not task.node.instance_info.get('multiboot', False):
                msg = "[{}] Non-multiboot deployment".format(task.node.uuid)
                raise exception.IronicException(message=msg, code=400)

            boot_info = task.node.instance_info.get('multiboot_info',
                                                    {'elements': []})

            grub_id = next((element['grub_id']
                            for element in boot_info['elements']
                            if (element['image_uuid'] == image or
                                element['image_name'] == image)), None)

            if grub_id is None:
                msg = ('[{}] Can\'t find desired multiboot '
                       'image'.format(task.node.uuid))
                raise exception.IronicException(message=msg, code=400)

            elif grub_id == boot_info.get('current_element', None):
                msg = ('[{}] Already in desired boot '
                       'device.'.format(task.node.uuid))
                raise exception.IronicException(message=msg, code=400)

            node_ip = bareon_utils.get_node_ip(task)
            ssh_key = resources.url_download_raw_secured(task.context,
                                                         task.node,
                                                         kwargs['ssh_key'])
            ssh = bareon_utils.get_ssh_connection(task, **{
                'host': node_ip,
                'username': kwargs['ssh_user'],
                'key_contents': ssh_key
            })

            tmp_path = processutils.ssh_execute(ssh, 'mktemp -d')[0].split()[0]
            cfg_path = os.path.join(tmp_path, 'boot', 'grub2', 'grub.cfg')

            commands = [
                'mount /dev/disk/by-uuid/{} {}'.format(
                    boot_info['multiboot_partition'],
                    tmp_path),
                "sed -i 's/\(set default=\)[0-9]*/\\1{}/' {}".format(grub_id,
                                                                     cfg_path),
                'umount {}'.format(tmp_path),
                'rmdir {}'.format(tmp_path)
            ]

            map(lambda cmd: processutils.ssh_execute(ssh, 'sudo ' + cmd),
                commands)

        except exception.SSHConnectFailed as e:
            msg = (
                _('[%(node)s] SSH connect to node %(host)s failed. '
                  'Error: %(error)s') % {'host': node_ip, 'error': e,
                                         'node': task.node.uuid})
            raise exception.IronicException(message=msg, code=400)

        except exception.IronicException as e:
            msg = str(e)
            raise

        except Exception as e:
            msg = (_('[%(node)s] Multiboot switch failed for node %(node)s. '
                     'Error: %(error)s') % {'node': task.node.uuid,
                                            'error': e})
            raise exception.IronicException(message=msg, code=400)

        else:
            boot_info['current_element'] = grub_id
            bareon_utils.change_node_dict(
                task.node, 'instance_info',
                {'multiboot_info': boot_info})
            task.node.save()

        finally:
            if msg:
                LOG.error(msg)
                task.node.last_error = msg
                task.node.save()


class DeploymentConfigValidator(object):
    _driver = None
    _namespace = 'bareon.drivers.data'
    _min_version = pkg_resources.parse_version('0.0.2')

    def __init__(self, driver_name):
        self.driver_name = driver_name

        LOG.debug('Loading bareon data-driver "%s"', self.driver_name)
        try:
            manager = stevedore.driver.DriverManager(
                self._namespace, self.driver_name, verify_requirements=True)
            extension = manager[driver_name]
            version = extension.entry_point.dist.version
            version = pkg_resources.parse_version(version)
            LOG.info('Driver %s-%s loaded', extension.name, version)

            if version < self._min_version:
                raise RuntimeError(
                    'bareon version less than {} does not support '
                    'deployment config validation'.format(self._min_version))
        except RuntimeError as e:
            LOG.warning(
                'Fail to load bareon data-driver "%s": %s',
                self.driver_name, e)
            return

        self._driver = manager.driver

    def __call__(self, deployment_config):
        if self._driver is None:
            LOG.info(
                'Skipping deployment config validation due to problem in '
                'loading bareon data driver')
            return

        try:
            with open(deployment_config, 'rt') as stream:
                payload = json.load(stream)
            self._driver.validate_data(payload)
        except (IOError, ValueError, TypeError) as e:
            raise exception.InvalidParameterValue(
                'Unable to load deployment config "{}": {}'.format(
                    deployment_config, e))
        except self._driver.exc.WrongInputDataError as e:
            raise exception.InvalidParameterValue(
                'Deployment config has failed validation.\n'
                '{0.message}'.format(e))


def get_provision_json_path(node):
    return os.path.join(resources.get_node_resources_dir(node),
                        "provision.json")


def get_actions_json_path(node):
    return os.path.join(resources.get_node_resources_dir(node),
                        "actions.json")


def get_on_fail_script_path(node):
    return os.path.join(resources.get_node_resources_dir(node),
                        "on_fail_script.sh")


def get_tenant_images_json_path(node):
    return os.path.join(resources.get_node_resources_dir(node),
                        "tenant_images.json")


class _AbstractAdapter(object):
    def __init__(self, data):
        self._raw = data

    def _extract_fields(self, mapping):
        for attr, name in mapping:
            try:
                value = self._raw[name]
            except KeyError:
                continue
            setattr(self, attr, value)


class _DeployStepsAdapter(_AbstractAdapter):
    action = action_payload = None
    status = status_details = None

    def __init__(self, data):
        super(_DeployStepsAdapter, self).__init__(data)

        self._extract_fields({
            'action': 'name',
            'status': 'status'}.items())
        self.action_payload = self._raw.get('payload', {})
        self.status_details = self._raw.get('status-details', '')


# TODO(dbogun): handle all driver_info keys
class _NodeDriverInfoAdapter(_AbstractAdapter):
    _exc_prefix = 'driver_info: '

    ssh_port = None
    # TODO(dbogun): check API way to defined access defaults
    ssh_login = 'root'
    ssh_key = '/etc/ironic/bareon_key'
    ssh_key_pub = None
    entry_point = 'bareon-provision'

    def __init__(self, node):
        super(_NodeDriverInfoAdapter, self).__init__(node.driver_info)
        self.node = node

        self._extract_fields({
            'ssh_port': 'bareon_ssh_port',
            'ssh_key': 'bareon_key_filename',
            'ssh_key_pub': 'bareon_public_key_filename',
            'ssh_login': 'bareon_username',
            'entry_point': 'bareon_deploy_script'}.items())
        self._process()
        self._validate()

    def _process(self):
        if self.ssh_key_pub is None:
            self.ssh_key_pub = '{}.pub'.format(self.ssh_key)

        if self.ssh_port is not None:
            self.ssh_port = int(self.ssh_port)
            if not 0 < self.ssh_port < 65536:
                raise exception.InvalidParameterValue(
                    '{}Invalid SSH port number({}) is outside of allowed '
                    'range.'.format(self._exc_prefix, 'bareon_ssh_port'))

    def _validate(self):
        self._validate_ssh_key()

    def _validate_ssh_key(self):
        missing = []
        pkey_stats = None
        for idx, target in enumerate((self.ssh_key, self.ssh_key_pub)):
            try:
                target_stat = os.stat(target)
                if not idx:
                    pkey_stats = target_stat
            except OSError as e:
                missing.append(e)

        missing = ['{0.filename}: {0.strerror}'.format(x) for x in missing]
        if missing:
            raise exception.InvalidParameterValue(
                '{}Unable to use SSH key:\n{}'.format(
                    self._exc_prefix, '\n'.join(missing)))

        issue = None
        if not stat.S_ISREG(pkey_stats.st_mode):
            issue = 'SSH private key {!r} is not a regular file.'.format(
                self.ssh_key)
        if pkey_stats.st_mode & 0o177:
            issue = 'Permissions {} for {!r} are too open.'.format(
                oct(pkey_stats.st_mode & 0o777), self.ssh_key)

        if issue:
            raise exception.InvalidParameterValue(issue)


@six.add_metaclass(abc.ABCMeta)
class _AbstractDeployStepHandler(object):
    def __init__(self, task, driver_info):
        self.task = task
        self.driver_info = driver_info

    @abc.abstractmethod
    def __call__(self):
        pass


@six.add_metaclass(abc.ABCMeta)
class _AbstractDeployStepResult(_AbstractDeployStepHandler):
    def __init__(self, task, driver_info, step_info):
        super(_AbstractDeployStepResult, self).__init__(task, driver_info)
        self.step_info = step_info

    def __call__(self):
        if not self.step_info.status:
            self._handle_error()
            return

        return self._handle()

    @abc.abstractmethod
    def _handle(self):
        pass

    def _handle_error(self):
        message = 'Deployment step "{}" have failed: {}'.format(
            self.step_info.action, self.step_info.status_details)
        # TODO(dbogun): add support for existing log extraction mechanism
        deploy_utils.set_failed_state(self.task, message, collect_logs=False)


@six.add_metaclass(abc.ABCMeta)
class _AbstractDeployStepRequest(_AbstractDeployStepHandler):
    @abc.abstractproperty
    def name(self):
        pass

    @abc.abstractproperty
    def result_handler(self):
        pass

    def __call__(self):
        payload = self._handle()
        return {
            'name': self.name,
            'payload': payload}

    @abc.abstractmethod
    def _handle(self):
        pass


class _InjectSSHKeyStepResult(_AbstractDeployStepResult):
    def _handle(self):
        pass


class _InjectSSHKeyStepRequest(_AbstractDeployStepRequest):
    name = 'inject-ssh-keys'
    result_handler = _InjectSSHKeyStepResult

    def _handle(self):
        try:
            with open(self.driver_info.ssh_key_pub) as data:
                ssh_key = data.read()
        except IOError as e:
            raise bareon_exception.DeployTaskError(
                name=type(self).__name__, details=e)

        return {
            'ssh-keys': {
                self.driver_info.ssh_login: [ssh_key]}}


class _DeployStepMapping(object):
    def __init__(self):
        self.steps = []

        base_cls = _AbstractDeployStepRequest
        target = sys.modules[__name__]
        for name in dir(target):
            value = getattr(target, name)
            if (inspect.isclass(value)
                    and issubclass(value, base_cls)
                    and value is not base_cls):
                self.steps.append(value)

        self.name_to_step = {}
        self.step_to_name = {}
        for task in self.steps:
            self.name_to_step[task.name] = task
            self.step_to_name[task] = task.name