
Set of tools that represent some sort of API to communicate with bareon instance. This API use vendor_passthru ironic API to "catch" request from bareon instance to ironic API. So bareon-ironic driver can receive bareon insnstance requests. This is existing communication channel. Before it was used to receive notification from bareon instance about successfull node load. Now this channel is extended to send "generic" tasks(step) from bareon-ironic driver to bareon instance. Right now only one task(step) is used - step to inject SSH key into bareon instance. This new "steps" interface allow to refuse from preinstalled SSH key in bareon instance, right now. And in future it allow to refuse from SSH communication between bareon-ironic and bareon instance... Change-Id: I0791807c7cb3dba70c71c4f46e5eddf01da76cdd
1188 lines
44 KiB
Python
1188 lines
44 KiB
Python
#
|
|
# Copyright 2017 Cray Inc., All Rights Reserved
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); you may
|
|
# not use this file except in compliance with the License. You may obtain
|
|
# a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
|
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
|
# License for the specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
"""
|
|
Bareon deploy driver.
|
|
"""
|
|
|
|
import abc
|
|
import inspect
|
|
import json
|
|
import os
|
|
import pprint
|
|
import stat
|
|
import sys
|
|
|
|
import eventlet
|
|
import pkg_resources
|
|
import stevedore
|
|
import six
|
|
from oslo_concurrency import processutils
|
|
from oslo_config import cfg
|
|
from oslo_log import log
|
|
from oslo_service import loopingcall
|
|
|
|
from ironic.common import boot_devices
|
|
from ironic.common import exception
|
|
from ironic.common import states
|
|
from ironic.common.i18n import _
|
|
from ironic.common.i18n import _LI
|
|
from ironic.conductor import task_manager
|
|
from ironic.conductor import utils as manager_utils
|
|
from ironic.drivers import base
|
|
from ironic.drivers.modules import deploy_utils
|
|
from ironic.objects import node as db_node
|
|
|
|
from bareon_ironic.modules import bareon_exception
|
|
from bareon_ironic.modules import bareon_utils
|
|
from bareon_ironic.modules.resources import actions
|
|
from bareon_ironic.modules.resources import image_service
|
|
from bareon_ironic.modules.resources import resources
|
|
|
|
agent_opts = [
|
|
cfg.StrOpt('bareon_pxe_append_params',
|
|
default='nofb nomodeset vga=normal',
|
|
help='Additional append parameters for baremetal PXE boot.'),
|
|
cfg.StrOpt('deploy_kernel',
|
|
help='UUID (from Glance) of the default deployment kernel.'),
|
|
cfg.StrOpt('deploy_ramdisk',
|
|
help='UUID (from Glance) of the default deployment ramdisk.'),
|
|
cfg.StrOpt('deploy_config_priority',
|
|
default='instance:node:image:conf',
|
|
help='Priority for deploy config'),
|
|
cfg.StrOpt('deploy_config',
|
|
help='A uuid or name of glance image representing '
|
|
'deploy config.'),
|
|
cfg.IntOpt('deploy_timeout',
|
|
default=15,
|
|
help="Timeout in minutes for the node continue-deploy process "
|
|
"(deployment phase following the callback)."),
|
|
cfg.IntOpt('check_terminate_interval',
|
|
help='Time interval in seconds to check whether the deployment '
|
|
'driver has responded to termination signal',
|
|
default=5),
|
|
cfg.IntOpt('check_terminate_max_retries',
|
|
help='Max retries to check is node already terminated',
|
|
default=20),
|
|
cfg.StrOpt('agent_data_driver',
|
|
default='ironic',
|
|
help='Bareon data driver'),
|
|
]
|
|
|
|
CONF = cfg.CONF
|
|
CONF.register_opts(agent_opts, group='bareon')
|
|
|
|
LOG = log.getLogger(__name__)
|
|
|
|
REQUIRED_PROPERTIES = {}
|
|
OTHER_PROPERTIES = {
|
|
'deploy_kernel': _('UUID (from Glance) of the deployment kernel.'),
|
|
'deploy_ramdisk': _('UUID (from Glance) of the deployment ramdisk.'),
|
|
'bareon_username': _('SSH username; default is "root" Optional.'),
|
|
'bareon_key_filename': _('Name of SSH private key file; default is '
|
|
'"/etc/ironic/bareon_key". Optional.'),
|
|
'bareon_ssh_port': _('SSH port; default is 22. Optional.'),
|
|
'bareon_deploy_script': _('path to bareon executable entry point; '
|
|
'default is "bareon-provision" Optional.'),
|
|
'deploy_config': _('Deploy config Glance image id/name'),
|
|
}
|
|
COMMON_PROPERTIES = OTHER_PROPERTIES
|
|
|
|
REQUIRED_BAREON_VERSION = "0.0."
|
|
|
|
TERMINATE_FLAG = 'terminate_deployment'
|
|
|
|
|
|
def _clean_up_images(task):
|
|
node = task.node
|
|
if node.instance_info.get('images_cleaned_up', False):
|
|
return
|
|
try:
|
|
with open(get_tenant_images_json_path(node)) as f:
|
|
images_json = json.loads(f.read())
|
|
except Exception as ex:
|
|
LOG.warning("Cannot find tenant_images.json for the %s node to"
|
|
"finish cleanup." % node)
|
|
LOG.warning(str(ex))
|
|
else:
|
|
images = resources.ResourceList.from_dict(images_json, task)
|
|
images.cleanup_resources()
|
|
bareon_utils.change_node_dict(task.node, 'instance_info',
|
|
{'images_cleaned_up': True})
|
|
|
|
|
|
class BareonDeploy(base.DeployInterface):
|
|
"""Interface for deploy-related actions."""
|
|
|
|
def __init__(self):
|
|
super(BareonDeploy, self).__init__()
|
|
self._deployment_config_validators = {}
|
|
|
|
def get_properties(self):
|
|
"""Return the properties of the interface.
|
|
|
|
:returns: dictionary of <property name>:<property description> entries.
|
|
"""
|
|
return COMMON_PROPERTIES
|
|
|
|
def validate(self, task):
|
|
"""Validate the driver-specific Node deployment info.
|
|
|
|
This method validates whether the properties of the supplied node
|
|
contain the required information for this driver to deploy images to
|
|
the node.
|
|
|
|
:param task: a TaskManager instance
|
|
:raises: MissingParameterValue
|
|
"""
|
|
|
|
_NodeDriverInfoAdapter(task.node)
|
|
self._validate_deployment_config(task)
|
|
|
|
@task_manager.require_exclusive_lock
|
|
def deploy(self, task):
|
|
"""Perform a deployment to a node.
|
|
|
|
Perform the necessary work to deploy an image onto the specified node.
|
|
This method will be called after prepare(), which may have already
|
|
performed any preparatory steps, such as pre-caching some data for the
|
|
node.
|
|
|
|
:param task: a TaskManager instance.
|
|
:returns: status of the deploy. One of ironic.common.states.
|
|
"""
|
|
manager_utils.node_power_action(task, states.REBOOT)
|
|
return states.DEPLOYWAIT
|
|
|
|
@task_manager.require_exclusive_lock
|
|
def tear_down(self, task):
|
|
"""Tear down a previous deployment on the task's node.
|
|
|
|
:param task: a TaskManager instance.
|
|
:returns: status of the deploy. One of ironic.common.states.
|
|
"""
|
|
manager_utils.node_power_action(task, states.POWER_OFF)
|
|
return states.DELETED
|
|
|
|
def prepare(self, task):
|
|
"""Prepare the deployment environment for this node.
|
|
|
|
:param task: a TaskManager instance.
|
|
"""
|
|
|
|
self._fetch_resources(task)
|
|
|
|
# Temporary set possible missing driver_info fields. This changes will
|
|
# not become persistent until someone do
|
|
# node.driver_info = updated_driver_info
|
|
# node.save()
|
|
driver_info = task.node.driver_info
|
|
for field, value in (
|
|
('deploy_kernel', CONF.bareon.deploy_kernel),
|
|
('deploy_ramdisk', CONF.bareon.deploy_ramdisk)):
|
|
driver_info.setdefault(field, value)
|
|
|
|
task.driver.boot.prepare_ramdisk(task,
|
|
self._build_pxe_config_options(task))
|
|
|
|
def clean_up(self, task):
|
|
"""Clean up the deployment environment for this node.
|
|
|
|
If preparation of the deployment environment ahead of time is possible,
|
|
this method should be implemented by the driver. It should erase
|
|
anything cached by the `prepare` method.
|
|
|
|
If implemented, this method must be idempotent. It may be called
|
|
multiple times for the same node on the same conductor, and it may be
|
|
called by multiple conductors in parallel. Therefore, it must not
|
|
require an exclusive lock.
|
|
|
|
This method is called before `tear_down`.
|
|
|
|
:param task: a TaskManager instance.
|
|
"""
|
|
|
|
task.driver.boot.clean_up_ramdisk(task)
|
|
_clean_up_images(task)
|
|
|
|
def take_over(self, task):
|
|
pass
|
|
|
|
def _fetch_resources(self, task):
|
|
self._fetch_provision_json(task)
|
|
self._fetch_actions(task)
|
|
|
|
def _fetch_provision_json(self, task):
|
|
config = self._get_deploy_config(task)
|
|
config = self._add_image_deployment_config(task, config)
|
|
|
|
deploy_data = config.get('deploy_data', {})
|
|
if 'kernel_params' not in deploy_data:
|
|
deploy_data['kernel_params'] = CONF.bareon.bareon_pxe_append_params
|
|
config['deploy_data'] = deploy_data
|
|
|
|
LOG.info('[{0}] Resulting provision.json is:\n{1}'.format(
|
|
task.node.uuid, config))
|
|
|
|
# On fail script is not passed to the agent, it is handled on
|
|
# Conductor.
|
|
on_fail_script_url = config.pop("on_fail_script", None)
|
|
self._fetch_on_fail_script(task, on_fail_script_url)
|
|
|
|
filename = get_provision_json_path(task.node)
|
|
LOG.info('[{0}] Writing provision.json to:\n{1}'.format(
|
|
task.node.uuid, filename))
|
|
with open(filename, 'w') as f:
|
|
f.write(json.dumps(config))
|
|
|
|
def _validate_deployment_config(self, task):
|
|
data_driver_name = bareon_utils.node_data_driver(task.node)
|
|
validator = self._get_deployment_config_validator(data_driver_name)
|
|
validator(get_provision_json_path(task.node))
|
|
|
|
def _get_deploy_config(self, task):
|
|
node = task.node
|
|
instance_info = node.instance_info
|
|
|
|
# Get options passed by nova, if any.
|
|
deploy_config_options = instance_info.get('deploy_config_options', {})
|
|
# Get options available at ironic side.
|
|
deploy_config_options['node'] = node.driver_info.get('deploy_config')
|
|
deploy_config_options['conf'] = CONF.bareon.deploy_config
|
|
# Cleaning empty options.
|
|
deploy_config_options = {k: v for k, v in
|
|
six.iteritems(deploy_config_options) if v}
|
|
|
|
configs = self._fetch_deploy_configs(task.context, node,
|
|
deploy_config_options)
|
|
return self._merge_configs(configs)
|
|
|
|
def _fetch_deploy_configs(self, context, node, cfg_options):
|
|
configs = {}
|
|
for key, url in six.iteritems(cfg_options):
|
|
configs[key] = resources.url_download_json(context, node,
|
|
url)
|
|
return configs
|
|
|
|
@staticmethod
|
|
def _merge_configs(configs):
|
|
# Merging first level attributes of configs according to priority
|
|
priority_list = CONF.bareon.deploy_config_priority.split(':')
|
|
unknown_sources = set(priority_list) - {'instance', 'node', 'conf',
|
|
'image'}
|
|
if unknown_sources:
|
|
raise ValueError('Unknown deploy config source %s' % str(
|
|
unknown_sources))
|
|
|
|
result = {}
|
|
for k in priority_list[::-1]:
|
|
if k in configs:
|
|
result.update(configs[k])
|
|
LOG.debug('Resulting deploy config:')
|
|
LOG.debug('%s', result)
|
|
return result
|
|
|
|
def _fetch_on_fail_script(self, task, url):
|
|
if not url:
|
|
return
|
|
path = get_on_fail_script_path(task.node)
|
|
LOG.info('[{0}] Fetching on_fail_script to:\n{1}'.format(
|
|
task.node.uuid, path))
|
|
resources.url_download(task.context, task.node, url, path)
|
|
|
|
def _fetch_actions(self, task):
|
|
driver_actions_url = task.node.instance_info.get('driver_actions')
|
|
actions_data = resources.url_download_json(task.context,
|
|
task.node,
|
|
driver_actions_url)
|
|
if not actions_data:
|
|
LOG.info("[%s] No driver_actions specified" % task.node.uuid)
|
|
return
|
|
|
|
controller = actions.ActionController(task, actions_data)
|
|
controller.fetch_action_resources()
|
|
|
|
actions_data = controller.to_dict()
|
|
LOG.info('[{0}] Deploy actions for the node are:\n{1}'.format(
|
|
task.node.uuid, actions_data))
|
|
|
|
filename = get_actions_json_path(task.node)
|
|
LOG.info('[{0}] Writing actions.json to:\n{1}'.format(
|
|
task.node.uuid, filename))
|
|
with open(filename, 'w') as f:
|
|
f.write(json.dumps(actions_data))
|
|
|
|
def _build_pxe_config_options(self, task):
|
|
"""Builds the pxe config options for booting agent.
|
|
|
|
This method builds the config options to be replaced on
|
|
the agent pxe config template.
|
|
|
|
:param task: a TaskManager instance
|
|
:returns: a dict containing the options to be applied on
|
|
the agent pxe config template.
|
|
"""
|
|
|
|
agent_config_opts = {
|
|
'deployment_id': task.node.uuid,
|
|
'ironic_api_url': deploy_utils.get_ironic_api_url(),
|
|
}
|
|
|
|
return agent_config_opts
|
|
|
|
def _get_image_resource_mode(self):
|
|
raise NotImplementedError
|
|
|
|
def _get_deploy_driver(self):
|
|
raise NotImplementedError
|
|
|
|
def _add_image_deployment_config(self, task, provision_config):
|
|
node = task.node
|
|
bareon_utils.change_node_dict(
|
|
node, 'instance_info',
|
|
{'deploy_driver': self._get_deploy_driver()})
|
|
node.save()
|
|
|
|
image_resource_mode = self._get_image_resource_mode()
|
|
boot_image = node.instance_info['image_source']
|
|
default_user_images = [
|
|
{
|
|
'name': boot_image,
|
|
'url': boot_image,
|
|
'target': "/",
|
|
}
|
|
]
|
|
user_images = provision_config.get('images', default_user_images)
|
|
|
|
invalid_images = []
|
|
origin_names = [None] * len(user_images)
|
|
for idx, image in enumerate(user_images):
|
|
try:
|
|
bareon_utils.validate_json(('name', 'url'), image)
|
|
except exception.MissingParameterValue as e:
|
|
invalid_images.append(
|
|
'Invalid "image" record - there is no key {key} (#{idx}: '
|
|
'{payload})'.format(
|
|
key=e, idx=idx, payload=json.dumps(image)))
|
|
continue
|
|
|
|
origin_names[idx] = image['name']
|
|
image_uuid, image_name = image_service.get_glance_image_uuid_name(
|
|
task, image['url'])
|
|
image['boot'] = (boot_image == image_uuid)
|
|
image['url'] = "glance:%s" % image_uuid
|
|
image['mode'] = image_resource_mode
|
|
image['image_uuid'] = image_uuid
|
|
image['image_name'] = image_name
|
|
|
|
if invalid_images:
|
|
raise exception.InvalidParameterValue(
|
|
err='\n'.join(invalid_images))
|
|
|
|
fetched_image_resources = self._fetch_images(task, user_images)
|
|
|
|
image_deployment_config = [
|
|
{
|
|
# Grab name from source data to keep it untouched, because
|
|
# "resources" subsystem replace all not alphanumeric symbols
|
|
# to underscores in 'name' field.
|
|
'name': name,
|
|
'image_pull_url': image.pull_url,
|
|
'target': image.target,
|
|
'boot': image.boot,
|
|
'image_uuid': image.image_uuid,
|
|
'image_name': image.image_name
|
|
}
|
|
for name, image in zip(origin_names, fetched_image_resources)
|
|
]
|
|
|
|
bareon_utils.change_node_dict(
|
|
task.node, 'instance_info',
|
|
{'multiboot': len(image_deployment_config) > 1})
|
|
node.save()
|
|
|
|
provision_config['images'] = image_deployment_config
|
|
return provision_config
|
|
|
|
def _fetch_images(self, task, image_resources):
|
|
images = resources.ResourceList({
|
|
"name": "tenant_images",
|
|
"resources": image_resources
|
|
}, task)
|
|
images.fetch_resources()
|
|
|
|
# NOTE(lobur): serialize tenant images json for further cleanup.
|
|
images_json = images.to_dict()
|
|
with open(get_tenant_images_json_path(task.node), 'w') as f:
|
|
f.write(json.dumps(images_json))
|
|
|
|
return images.resources
|
|
|
|
def terminate_deployment(self, task):
|
|
node = task.node
|
|
if TERMINATE_FLAG not in node.instance_info:
|
|
|
|
def _wait_for_node_to_become_terminated(retries, max_retries,
|
|
task):
|
|
task_node = task.node
|
|
retries[0] += 1
|
|
if retries[0] > max_retries:
|
|
bareon_utils.change_node_dict(
|
|
task_node, 'instance_info',
|
|
{TERMINATE_FLAG: 'failed'})
|
|
task_node.reservation = None
|
|
task_node.save()
|
|
|
|
raise bareon_exception.RetriesException(
|
|
retry_count=max_retries)
|
|
|
|
current_node = db_node.Node.get_by_uuid(task.context,
|
|
task_node.uuid)
|
|
if current_node.instance_info.get(TERMINATE_FLAG) == 'done':
|
|
raise loopingcall.LoopingCallDone()
|
|
|
|
bareon_utils.change_node_dict(
|
|
node, 'instance_info',
|
|
{TERMINATE_FLAG: 'requested'})
|
|
node.save()
|
|
|
|
retries = [0]
|
|
interval = CONF.bareon.check_terminate_interval
|
|
max_retries = CONF.bareon.check_terminate_max_retries
|
|
|
|
timer = loopingcall.FixedIntervalLoopingCall(
|
|
_wait_for_node_to_become_terminated,
|
|
retries, max_retries, task)
|
|
try:
|
|
timer.start(interval=interval).wait()
|
|
except bareon_exception.RetriesException as ex:
|
|
LOG.error('Failed to terminate node. Error: %(error)s' % {
|
|
'error': ex})
|
|
|
|
@property
|
|
def can_terminate_deployment(self):
|
|
return True
|
|
|
|
def _get_deployment_config_validator(self, driver_name):
|
|
try:
|
|
validator = self._deployment_config_validators[driver_name]
|
|
except KeyError:
|
|
validator = DeploymentConfigValidator(driver_name)
|
|
self._deployment_config_validators[driver_name] = validator
|
|
return validator
|
|
|
|
|
|
class BareonVendor(base.VendorInterface):
|
|
def get_properties(self):
|
|
"""Return the properties of the interface.
|
|
|
|
:returns: dictionary of <property name>:<property description> entries.
|
|
"""
|
|
return COMMON_PROPERTIES
|
|
|
|
def validate(self, task, method=None, **kwargs):
|
|
"""Validate the driver-specific Node deployment info.
|
|
|
|
:param task: a TaskManager instance
|
|
:param method: method to be validated
|
|
"""
|
|
if method in ('exec_actions', 'deploy_steps'):
|
|
return
|
|
|
|
if method == 'switch_boot':
|
|
self.validate_switch_boot(task, **kwargs)
|
|
return
|
|
|
|
if not kwargs.get('address'):
|
|
raise exception.MissingParameterValue(_('Bareon must pass '
|
|
'address of a node.'))
|
|
_NodeDriverInfoAdapter(task.node)
|
|
|
|
def validate_switch_boot(self, task, **kwargs):
|
|
if not kwargs.get('image'):
|
|
raise exception.MissingParameterValue(_('No image info passed.'))
|
|
if not kwargs.get('ssh_key'):
|
|
raise exception.MissingParameterValue(_('No ssh key info passed.'))
|
|
if not kwargs.get('ssh_user'):
|
|
raise exception.MissingParameterValue(_('No ssh user info '
|
|
'passed.'))
|
|
|
|
@base.passthru(['GET', 'POST'], async=False)
|
|
def deploy_steps(self, task, **data):
|
|
http_method = data.pop('http_method')
|
|
driver_info = _NodeDriverInfoAdapter(task.node)
|
|
|
|
if http_method == 'GET':
|
|
ssh_keys_step = _InjectSSHKeyStepRequest(task, driver_info)
|
|
return ssh_keys_step()
|
|
|
|
steps_mapping = _DeployStepMapping()
|
|
data = _DeployStepsAdapter(data)
|
|
try:
|
|
request_cls = steps_mapping.name_to_step[data.action]
|
|
except KeyError:
|
|
if data.action is not None:
|
|
raise RuntimeError(
|
|
'There is no name mapping for deployment step: '
|
|
'{!r}'.format(data.action))
|
|
|
|
message = (
|
|
'Bareon\'s callback service have failed with internall error')
|
|
if data.status_details:
|
|
message += '\nFailure details: {}'.format(
|
|
pprint.pformat(data.status_details))
|
|
# TODO(dbogun): add support for existing log extraction mechanism
|
|
deploy_utils.set_failed_state(
|
|
task, message, collect_logs=False)
|
|
else:
|
|
handler = request_cls.result_handler(
|
|
task, driver_info, data)
|
|
handler()
|
|
|
|
return {'url': None}
|
|
|
|
@base.passthru(['POST'])
|
|
@task_manager.require_exclusive_lock
|
|
def pass_deploy_info(self, task, **kwargs):
|
|
"""Continues the deployment of baremetal node."""
|
|
node = task.node
|
|
task.process_event('resume')
|
|
|
|
driver_info = _NodeDriverInfoAdapter(task.node)
|
|
|
|
cmd = '{} --data_driver "{}" --deploy_driver "{}"'.format(
|
|
driver_info.entry_point, bareon_utils.node_data_driver(node),
|
|
node.instance_info['deploy_driver'])
|
|
if CONF.debug:
|
|
cmd += ' --debug'
|
|
instance_info = node.instance_info
|
|
|
|
connect_args = {
|
|
'username': driver_info.ssh_login,
|
|
'key_filename': driver_info.ssh_key,
|
|
'host': kwargs['address']}
|
|
if driver_info.ssh_port:
|
|
connect_args['port'] = driver_info.ssh_port
|
|
|
|
try:
|
|
ssh = bareon_utils.get_ssh_connection(task, **connect_args)
|
|
sftp = ssh.open_sftp()
|
|
|
|
self._check_bareon_version(ssh, node.uuid)
|
|
|
|
provision_config_path = get_provision_json_path(task.node)
|
|
# TODO(yuriyz) no hardcode
|
|
sftp.put(provision_config_path, '/tmp/provision.json')
|
|
|
|
# swift configdrive store should be disabled
|
|
configdrive = instance_info.get('configdrive')
|
|
if configdrive is not None:
|
|
# TODO(yuriyz) no hardcode
|
|
bareon_utils.sftp_write_to(sftp, configdrive,
|
|
'/tmp/config-drive.img')
|
|
|
|
out, err = self._deploy(task, ssh, cmd, **connect_args)
|
|
LOG.info(_LI('[%(node)s] Bareon pass on node %(node)s'),
|
|
{'node': node.uuid})
|
|
LOG.debug('[%s] Bareon stdout is: "%s"', node.uuid, out)
|
|
LOG.debug('[%s] Bareon stderr is: "%s"', node.uuid, err)
|
|
|
|
self._get_boot_info(task, ssh)
|
|
|
|
self._run_actions(task, ssh, sftp, connect_args)
|
|
|
|
manager_utils.node_power_action(task, states.POWER_OFF)
|
|
manager_utils.node_set_boot_device(task, boot_devices.DISK,
|
|
persistent=True)
|
|
manager_utils.node_power_action(task, states.POWER_ON)
|
|
|
|
except exception.SSHConnectFailed as e:
|
|
msg = (
|
|
_('[%(node)s] SSH connect to node %(host)s failed. '
|
|
'Error: %(error)s') % {'host': connect_args['host'],
|
|
'error': e, 'node': node.uuid})
|
|
self._deploy_failed(task, msg)
|
|
|
|
except exception.ConfigInvalid as e:
|
|
msg = (_('[%(node)s] Invalid provision config. '
|
|
'Error: %(error)s') % {'error': e, 'node': node.uuid})
|
|
self._deploy_failed(task, msg)
|
|
|
|
except bareon_exception.DeployTerminationSucceed:
|
|
LOG.info(_LI('[%(node)s] Deployment was terminated'),
|
|
{'node': node.uuid})
|
|
|
|
except Exception as e:
|
|
self._run_on_fail_script(task, sftp, ssh)
|
|
|
|
msg = (_('[%(node)s] Deploy failed for node %(node)s. '
|
|
'Error: %(error)s') % {'node': node.uuid, 'error': e})
|
|
self._bareon_log(task, ssh)
|
|
self._deploy_failed(task, msg)
|
|
|
|
else:
|
|
task.process_event('done')
|
|
LOG.info(_LI('Deployment to node %s done'), task.node.uuid)
|
|
|
|
finally:
|
|
self._clean_up_deployment_resources(task)
|
|
|
|
def _deploy_failed(self, task, msg):
|
|
LOG.error(msg)
|
|
deploy_utils.set_failed_state(task, msg, collect_logs=False)
|
|
|
|
def _check_bareon_version(self, ssh, node_uuid):
|
|
try:
|
|
stdout, stderr = processutils.ssh_execute(
|
|
ssh, 'cat /etc/bareon-release')
|
|
|
|
LOG.info(_LI("[{0}] Tracing Bareon version.\n{1}").format(
|
|
node_uuid, stdout))
|
|
|
|
version = ""
|
|
lines = stdout.splitlines()
|
|
if lines:
|
|
version_line = lines[0]
|
|
name, _, version = version_line.partition("==")
|
|
if version.startswith(REQUIRED_BAREON_VERSION):
|
|
return
|
|
|
|
msg = ("Bareon version '%(req)s' is required, but version "
|
|
"'%(found)s' found on the ramdisk."
|
|
% dict(req=REQUIRED_BAREON_VERSION,
|
|
found=version))
|
|
raise bareon_exception.IncompatibleRamdiskVersion(details=msg)
|
|
except processutils.ProcessExecutionError:
|
|
msg = "Bareon version cannot be read on the ramdisk."
|
|
raise bareon_exception.IncompatibleRamdiskVersion(details=msg)
|
|
|
|
def _get_boot_info(self, task, ssh):
|
|
node = task.node
|
|
node_uuid = node.uuid
|
|
|
|
if not node.instance_info.get('multiboot', False):
|
|
return
|
|
try:
|
|
stdout, stderr = processutils.ssh_execute(
|
|
ssh, 'cat /tmp/boot_entries.json')
|
|
except processutils.ProcessExecutionError as exec_err:
|
|
LOG.warning(_LI('[%(node)s] Error getting boot info. '
|
|
'Error: %(error)s') % {'node': node_uuid,
|
|
'error': exec_err})
|
|
raise
|
|
else:
|
|
multiboot_info = json.loads(stdout)
|
|
bareon_utils.change_node_dict(node, 'instance_info', {
|
|
'multiboot_info': multiboot_info
|
|
})
|
|
LOG.info("[{1}] {0} Multiboot info {0}\n{2}"
|
|
"\n".format("#" * 20, node_uuid, multiboot_info))
|
|
|
|
def _run_actions(self, task, ssh, sftp, sshparams):
|
|
actions_path = get_actions_json_path(task.node)
|
|
if not os.path.exists(actions_path):
|
|
LOG.info(_LI("[%(node)s] No actions specified. Skipping")
|
|
% {'node': task.node.uuid})
|
|
return
|
|
|
|
with open(actions_path) as f:
|
|
actions_data = json.loads(f.read())
|
|
actions_controller = actions.ActionController(
|
|
task, actions_data
|
|
)
|
|
|
|
actions_controller.execute(ssh, sftp, **sshparams)
|
|
|
|
def _bareon_log(self, task, ssh):
|
|
node_uuid = task.node.uuid
|
|
try:
|
|
# TODO(oberezovskyi): Chenge log pulling mechanism (e.g. use
|
|
# remote logging feature of syslog)
|
|
stdout, stderr = processutils.ssh_execute(
|
|
ssh, 'cat /var/log/bareon.log')
|
|
except processutils.ProcessExecutionError as exec_err:
|
|
LOG.warning(_LI('[%(node)s] Error getting Bareon log. '
|
|
'Error: %(error)s') % {'node': node_uuid,
|
|
'error': exec_err})
|
|
else:
|
|
LOG.info("[{1}] {0} Start Bareon log {0}\n{2}\n"
|
|
"[{1}] {0} End Bareon log {0}".format("#" * 20,
|
|
node_uuid,
|
|
stdout))
|
|
|
|
def _run_on_fail_script(self, task, sftp, ssh):
|
|
node = task.node
|
|
node_uuid = node.uuid
|
|
try:
|
|
on_fail_script_path = get_on_fail_script_path(node)
|
|
if not os.path.exists(on_fail_script_path):
|
|
LOG.info(_LI("[%(node)s] No on_fail_script passed. Skipping")
|
|
% {'node': node_uuid})
|
|
return
|
|
|
|
LOG.debug(_LI('[%(node)s] Uploading on_fail script to the node.'),
|
|
{'node': node_uuid})
|
|
sftp.put(on_fail_script_path, '/tmp/bareon_on_fail.sh')
|
|
|
|
LOG.debug("[%(node)s] Executing on_fail_script."
|
|
% {'node': node_uuid})
|
|
out, err = processutils.ssh_execute(
|
|
ssh, "bash %s" % '/tmp/bareon_on_fail.sh')
|
|
|
|
except processutils.ProcessExecutionError as ex:
|
|
LOG.warning(_LI('[%(node)s] Error executing OnFail script. '
|
|
'Error: %(er)s') % {'node': node_uuid, 'er': ex})
|
|
|
|
except exception.SSHConnectFailed as ex:
|
|
LOG.warning(_LI('[%(node)s] SSH connection error. '
|
|
'Error: %(er)s') % {'node': node_uuid, 'er': ex})
|
|
|
|
except Exception as ex:
|
|
LOG.warning(_LI('[%(node)s] Unknown error. '
|
|
'Error: %(error)s') % {'node': node_uuid,
|
|
'error': ex})
|
|
else:
|
|
LOG.info(
|
|
"{0} [{1}] on_fail sctipt result below {0}".format("#" * 40,
|
|
node_uuid))
|
|
LOG.info(out)
|
|
LOG.info(err)
|
|
LOG.info("{0} [{1}] End on_fail script "
|
|
"result {0}".format("#" * 40, node_uuid))
|
|
|
|
def _clean_up_deployment_resources(self, task):
|
|
_clean_up_images(task)
|
|
self._clean_up_actions(task)
|
|
|
|
def _clean_up_actions(self, task):
|
|
filename = get_actions_json_path(task.node)
|
|
if not os.path.exists(filename):
|
|
return
|
|
|
|
with open(filename) as f:
|
|
actions_data = json.loads(f.read())
|
|
|
|
controller = actions.ActionController(task, actions_data)
|
|
controller.cleanup_action_resources()
|
|
|
|
@base.passthru(['POST'])
|
|
@task_manager.require_exclusive_lock
|
|
def exec_actions(self, task, **kwargs):
|
|
actions_json = resources.url_download_json(
|
|
task.context, task.node, kwargs.get('driver_actions'))
|
|
if not actions_json:
|
|
LOG.info("[%s] No driver_actions specified." % task.node.uuid)
|
|
return
|
|
|
|
ssh_user = actions_json.pop('action_user')
|
|
ssh_key_url = actions_json.pop('action_key')
|
|
node_ip = bareon_utils.get_node_ip(task)
|
|
|
|
controller = actions.ActionController(task, actions_json)
|
|
controller.ssh_and_execute(node_ip, ssh_user, ssh_key_url)
|
|
|
|
def _execute_deploy_script(self, task, ssh, cmd, *args, **kwargs):
|
|
# NOTE(oberezovskyi): minutes to seconds
|
|
timeout = CONF.bareon.deploy_timeout * 60
|
|
LOG.debug('[%s] Running cmd (SSH): %s', task.node.uuid, cmd)
|
|
try:
|
|
out, err = bareon_utils.ssh_execute(ssh, cmd, timeout=timeout,
|
|
check_exit_code=True)
|
|
except exception.SSHCommandFailed as err:
|
|
LOG.debug('[%s] Deploy script execute failed: "%s"',
|
|
task.node.uuid, err)
|
|
raise bareon_exception.DeploymentTimeout(timeout=timeout)
|
|
return out, err
|
|
|
|
def _deploy(self, task, ssh, cmd, **params):
|
|
deployment_thread = eventlet.spawn(self._execute_deploy_script,
|
|
task, ssh, cmd, **params)
|
|
|
|
def _wait_for_deployment_finished(task, thread):
|
|
task_node = task.node
|
|
current_node = db_node.Node.get_by_uuid(task.context,
|
|
task_node.uuid)
|
|
|
|
# NOTE(oberezovskyi): greenthread have no way to check is
|
|
# thread already finished, so need to access to
|
|
# private variable
|
|
if thread._exit_event.ready():
|
|
raise loopingcall.LoopingCallDone()
|
|
|
|
if (current_node.instance_info.get(TERMINATE_FLAG,
|
|
'') == 'requested'):
|
|
thread.kill()
|
|
bareon_utils.change_node_dict(
|
|
task_node, 'instance_info',
|
|
{TERMINATE_FLAG: 'done'})
|
|
task_node.save()
|
|
raise bareon_exception.DeployTerminationSucceed()
|
|
|
|
timer = loopingcall.FixedIntervalLoopingCall(
|
|
_wait_for_deployment_finished, task, deployment_thread)
|
|
timer.start(interval=5).wait()
|
|
return deployment_thread.wait()
|
|
|
|
@base.passthru(['POST'], async=False)
|
|
@task_manager.require_exclusive_lock
|
|
def switch_boot(self, task, **kwargs):
|
|
# NOTE(oberezovskyi): exception messages should not be changed because
|
|
# of hardcode in nova-ironic driver
|
|
image = kwargs.get('image')
|
|
LOG.info('[{0}] Attempt to switch boot to {1} '
|
|
'image'.format(task.node.uuid, image))
|
|
|
|
msg = ""
|
|
try:
|
|
if not task.node.instance_info.get('multiboot', False):
|
|
msg = "[{}] Non-multiboot deployment".format(task.node.uuid)
|
|
raise exception.IronicException(message=msg, code=400)
|
|
|
|
boot_info = task.node.instance_info.get('multiboot_info',
|
|
{'elements': []})
|
|
|
|
grub_id = next((element['grub_id']
|
|
for element in boot_info['elements']
|
|
if (element['image_uuid'] == image or
|
|
element['image_name'] == image)), None)
|
|
|
|
if grub_id is None:
|
|
msg = ('[{}] Can\'t find desired multiboot '
|
|
'image'.format(task.node.uuid))
|
|
raise exception.IronicException(message=msg, code=400)
|
|
|
|
elif grub_id == boot_info.get('current_element', None):
|
|
msg = ('[{}] Already in desired boot '
|
|
'device.'.format(task.node.uuid))
|
|
raise exception.IronicException(message=msg, code=400)
|
|
|
|
node_ip = bareon_utils.get_node_ip(task)
|
|
ssh_key = resources.url_download_raw_secured(task.context,
|
|
task.node,
|
|
kwargs['ssh_key'])
|
|
ssh = bareon_utils.get_ssh_connection(task, **{
|
|
'host': node_ip,
|
|
'username': kwargs['ssh_user'],
|
|
'key_contents': ssh_key
|
|
})
|
|
|
|
tmp_path = processutils.ssh_execute(ssh, 'mktemp -d')[0].split()[0]
|
|
cfg_path = os.path.join(tmp_path, 'boot', 'grub2', 'grub.cfg')
|
|
|
|
commands = [
|
|
'mount /dev/disk/by-uuid/{} {}'.format(
|
|
boot_info['multiboot_partition'],
|
|
tmp_path),
|
|
"sed -i 's/\(set default=\)[0-9]*/\\1{}/' {}".format(grub_id,
|
|
cfg_path),
|
|
'umount {}'.format(tmp_path),
|
|
'rmdir {}'.format(tmp_path)
|
|
]
|
|
|
|
map(lambda cmd: processutils.ssh_execute(ssh, 'sudo ' + cmd),
|
|
commands)
|
|
|
|
except exception.SSHConnectFailed as e:
|
|
msg = (
|
|
_('[%(node)s] SSH connect to node %(host)s failed. '
|
|
'Error: %(error)s') % {'host': node_ip, 'error': e,
|
|
'node': task.node.uuid})
|
|
raise exception.IronicException(message=msg, code=400)
|
|
|
|
except exception.IronicException as e:
|
|
msg = str(e)
|
|
raise
|
|
|
|
except Exception as e:
|
|
msg = (_('[%(node)s] Multiboot switch failed for node %(node)s. '
|
|
'Error: %(error)s') % {'node': task.node.uuid,
|
|
'error': e})
|
|
raise exception.IronicException(message=msg, code=400)
|
|
|
|
else:
|
|
boot_info['current_element'] = grub_id
|
|
bareon_utils.change_node_dict(
|
|
task.node, 'instance_info',
|
|
{'multiboot_info': boot_info})
|
|
task.node.save()
|
|
|
|
finally:
|
|
if msg:
|
|
LOG.error(msg)
|
|
task.node.last_error = msg
|
|
task.node.save()
|
|
|
|
|
|
class DeploymentConfigValidator(object):
|
|
_driver = None
|
|
_namespace = 'bareon.drivers.data'
|
|
_min_version = pkg_resources.parse_version('0.0.2')
|
|
|
|
def __init__(self, driver_name):
|
|
self.driver_name = driver_name
|
|
|
|
LOG.debug('Loading bareon data-driver "%s"', self.driver_name)
|
|
try:
|
|
manager = stevedore.driver.DriverManager(
|
|
self._namespace, self.driver_name, verify_requirements=True)
|
|
extension = manager[driver_name]
|
|
version = extension.entry_point.dist.version
|
|
version = pkg_resources.parse_version(version)
|
|
LOG.info('Driver %s-%s loaded', extension.name, version)
|
|
|
|
if version < self._min_version:
|
|
raise RuntimeError(
|
|
'bareon version less than {} does not support '
|
|
'deployment config validation'.format(self._min_version))
|
|
except RuntimeError as e:
|
|
LOG.warning(
|
|
'Fail to load bareon data-driver "%s": %s',
|
|
self.driver_name, e)
|
|
return
|
|
|
|
self._driver = manager.driver
|
|
|
|
def __call__(self, deployment_config):
|
|
if self._driver is None:
|
|
LOG.info(
|
|
'Skipping deployment config validation due to problem in '
|
|
'loading bareon data driver')
|
|
return
|
|
|
|
try:
|
|
with open(deployment_config, 'rt') as stream:
|
|
payload = json.load(stream)
|
|
self._driver.validate_data(payload)
|
|
except (IOError, ValueError, TypeError) as e:
|
|
raise exception.InvalidParameterValue(
|
|
'Unable to load deployment config "{}": {}'.format(
|
|
deployment_config, e))
|
|
except self._driver.exc.WrongInputDataError as e:
|
|
raise exception.InvalidParameterValue(
|
|
'Deployment config has failed validation.\n'
|
|
'{0.message}'.format(e))
|
|
|
|
|
|
def get_provision_json_path(node):
|
|
return os.path.join(resources.get_node_resources_dir(node),
|
|
"provision.json")
|
|
|
|
|
|
def get_actions_json_path(node):
|
|
return os.path.join(resources.get_node_resources_dir(node),
|
|
"actions.json")
|
|
|
|
|
|
def get_on_fail_script_path(node):
|
|
return os.path.join(resources.get_node_resources_dir(node),
|
|
"on_fail_script.sh")
|
|
|
|
|
|
def get_tenant_images_json_path(node):
|
|
return os.path.join(resources.get_node_resources_dir(node),
|
|
"tenant_images.json")
|
|
|
|
|
|
class _AbstractAdapter(object):
|
|
def __init__(self, data):
|
|
self._raw = data
|
|
|
|
def _extract_fields(self, mapping):
|
|
for attr, name in mapping:
|
|
try:
|
|
value = self._raw[name]
|
|
except KeyError:
|
|
continue
|
|
setattr(self, attr, value)
|
|
|
|
|
|
class _DeployStepsAdapter(_AbstractAdapter):
|
|
action = action_payload = None
|
|
status = status_details = None
|
|
|
|
def __init__(self, data):
|
|
super(_DeployStepsAdapter, self).__init__(data)
|
|
|
|
self._extract_fields({
|
|
'action': 'name',
|
|
'status': 'status'}.items())
|
|
self.action_payload = self._raw.get('payload', {})
|
|
self.status_details = self._raw.get('status-details', '')
|
|
|
|
|
|
# TODO(dbogun): handle all driver_info keys
|
|
class _NodeDriverInfoAdapter(_AbstractAdapter):
|
|
_exc_prefix = 'driver_info: '
|
|
|
|
ssh_port = None
|
|
# TODO(dbogun): check API way to defined access defaults
|
|
ssh_login = 'root'
|
|
ssh_key = '/etc/ironic/bareon_key'
|
|
ssh_key_pub = None
|
|
entry_point = 'bareon-provision'
|
|
|
|
def __init__(self, node):
|
|
super(_NodeDriverInfoAdapter, self).__init__(node.driver_info)
|
|
self.node = node
|
|
|
|
self._extract_fields({
|
|
'ssh_port': 'bareon_ssh_port',
|
|
'ssh_key': 'bareon_key_filename',
|
|
'ssh_key_pub': 'bareon_public_key_filename',
|
|
'ssh_login': 'bareon_username',
|
|
'entry_point': 'bareon_deploy_script'}.items())
|
|
self._process()
|
|
self._validate()
|
|
|
|
def _process(self):
|
|
if self.ssh_key_pub is None:
|
|
self.ssh_key_pub = '{}.pub'.format(self.ssh_key)
|
|
|
|
if self.ssh_port is not None:
|
|
self.ssh_port = int(self.ssh_port)
|
|
if not 0 < self.ssh_port < 65536:
|
|
raise exception.InvalidParameterValue(
|
|
'{}Invalid SSH port number({}) is outside of allowed '
|
|
'range.'.format(self._exc_prefix, 'bareon_ssh_port'))
|
|
|
|
def _validate(self):
|
|
self._validate_ssh_key()
|
|
|
|
def _validate_ssh_key(self):
|
|
missing = []
|
|
pkey_stats = None
|
|
for idx, target in enumerate((self.ssh_key, self.ssh_key_pub)):
|
|
try:
|
|
target_stat = os.stat(target)
|
|
if not idx:
|
|
pkey_stats = target_stat
|
|
except OSError as e:
|
|
missing.append(e)
|
|
|
|
missing = ['{0.filename}: {0.strerror}'.format(x) for x in missing]
|
|
if missing:
|
|
raise exception.InvalidParameterValue(
|
|
'{}Unable to use SSH key:\n{}'.format(
|
|
self._exc_prefix, '\n'.join(missing)))
|
|
|
|
issue = None
|
|
if not stat.S_ISREG(pkey_stats.st_mode):
|
|
issue = 'SSH private key {!r} is not a regular file.'.format(
|
|
self.ssh_key)
|
|
if pkey_stats.st_mode & 0o177:
|
|
issue = 'Permissions {} for {!r} are too open.'.format(
|
|
oct(pkey_stats.st_mode & 0o777), self.ssh_key)
|
|
|
|
if issue:
|
|
raise exception.InvalidParameterValue(issue)
|
|
|
|
|
|
@six.add_metaclass(abc.ABCMeta)
|
|
class _AbstractDeployStepHandler(object):
|
|
def __init__(self, task, driver_info):
|
|
self.task = task
|
|
self.driver_info = driver_info
|
|
|
|
@abc.abstractmethod
|
|
def __call__(self):
|
|
pass
|
|
|
|
|
|
@six.add_metaclass(abc.ABCMeta)
|
|
class _AbstractDeployStepResult(_AbstractDeployStepHandler):
|
|
def __init__(self, task, driver_info, step_info):
|
|
super(_AbstractDeployStepResult, self).__init__(task, driver_info)
|
|
self.step_info = step_info
|
|
|
|
def __call__(self):
|
|
if not self.step_info.status:
|
|
self._handle_error()
|
|
return
|
|
|
|
return self._handle()
|
|
|
|
@abc.abstractmethod
|
|
def _handle(self):
|
|
pass
|
|
|
|
def _handle_error(self):
|
|
message = 'Deployment step "{}" have failed: {}'.format(
|
|
self.step_info.action, self.step_info.status_details)
|
|
# TODO(dbogun): add support for existing log extraction mechanism
|
|
deploy_utils.set_failed_state(self.task, message, collect_logs=False)
|
|
|
|
|
|
@six.add_metaclass(abc.ABCMeta)
|
|
class _AbstractDeployStepRequest(_AbstractDeployStepHandler):
|
|
@abc.abstractproperty
|
|
def name(self):
|
|
pass
|
|
|
|
@abc.abstractproperty
|
|
def result_handler(self):
|
|
pass
|
|
|
|
def __call__(self):
|
|
payload = self._handle()
|
|
return {
|
|
'name': self.name,
|
|
'payload': payload}
|
|
|
|
@abc.abstractmethod
|
|
def _handle(self):
|
|
pass
|
|
|
|
|
|
class _InjectSSHKeyStepResult(_AbstractDeployStepResult):
|
|
def _handle(self):
|
|
pass
|
|
|
|
|
|
class _InjectSSHKeyStepRequest(_AbstractDeployStepRequest):
|
|
name = 'inject-ssh-keys'
|
|
result_handler = _InjectSSHKeyStepResult
|
|
|
|
def _handle(self):
|
|
try:
|
|
with open(self.driver_info.ssh_key_pub) as data:
|
|
ssh_key = data.read()
|
|
except IOError as e:
|
|
raise bareon_exception.DeployTaskError(
|
|
name=type(self).__name__, details=e)
|
|
|
|
return {
|
|
'ssh-keys': {
|
|
self.driver_info.ssh_login: [ssh_key]}}
|
|
|
|
|
|
class _DeployStepMapping(object):
|
|
def __init__(self):
|
|
self.steps = []
|
|
|
|
base_cls = _AbstractDeployStepRequest
|
|
target = sys.modules[__name__]
|
|
for name in dir(target):
|
|
value = getattr(target, name)
|
|
if (inspect.isclass(value)
|
|
and issubclass(value, base_cls)
|
|
and value is not base_cls):
|
|
self.steps.append(value)
|
|
|
|
self.name_to_step = {}
|
|
self.step_to_name = {}
|
|
for task in self.steps:
|
|
self.name_to_step[task.name] = task
|
|
self.step_to_name[task] = task.name
|