DRYD-30 Support node tags and kernel parameters
Create MaaS API model for Tags Update YAML schema to add 'platform' section to HostProfile/BaremetalNode Update YAML ingester to support platform section Add node_filter support to the /tasks API
This commit is contained in:
parent
6facad6c06
commit
e937282626
@ -37,6 +37,7 @@ class TasksResource(StatefulResource):
|
||||
sitename = json_data.get('sitename', None)
|
||||
design_id = json_data.get('design_id', None)
|
||||
action = json_data.get('action', None)
|
||||
node_filter = json_data.get('node_filter', None)
|
||||
|
||||
if sitename is None or design_id is None or action is None:
|
||||
self.info(req.context, "Task creation requires fields sitename, design_id, action")
|
||||
@ -44,7 +45,7 @@ class TasksResource(StatefulResource):
|
||||
return
|
||||
|
||||
task = self.orchestrator.create_task(obj_task.OrchestratorTask, site=sitename,
|
||||
design_id=design_id, action=action)
|
||||
design_id=design_id, action=action, node_filter=node_filter)
|
||||
|
||||
task_thread = threading.Thread(target=self.orchestrator.execute_task, args=[task.get_id()])
|
||||
task_thread.start()
|
||||
|
@ -31,6 +31,7 @@ import drydock_provisioner.drivers.node.maasdriver.models.fabric as maas_fabric
|
||||
import drydock_provisioner.drivers.node.maasdriver.models.vlan as maas_vlan
|
||||
import drydock_provisioner.drivers.node.maasdriver.models.subnet as maas_subnet
|
||||
import drydock_provisioner.drivers.node.maasdriver.models.machine as maas_machine
|
||||
import drydock_provisioner.drivers.node.maasdriver.models.tag as maas_tag
|
||||
|
||||
class MaasNodeDriver(NodeDriver):
|
||||
maasdriver_options = [
|
||||
@ -364,7 +365,76 @@ class MaasNodeDriver(NodeDriver):
|
||||
status=hd_fields.TaskStatus.Complete,
|
||||
result=result,
|
||||
result_detail=result_detail)
|
||||
elif task.action ==hd_fields.OrchestratorAction.DeployNode:
|
||||
elif task.action == hd_fields.OrchestratorAction.ApplyNodePlatform:
|
||||
self.orchestrator.task_field_update(task.get_id(),
|
||||
status=hd_fields.TaskStatus.Running)
|
||||
|
||||
self.logger.debug("Starting subtask to configure the platform on %s nodes." % (len(task.node_list)))
|
||||
|
||||
subtasks = []
|
||||
|
||||
result_detail = {
|
||||
'detail': [],
|
||||
'failed_nodes': [],
|
||||
'successful_nodes': [],
|
||||
}
|
||||
|
||||
for n in task.node_list:
|
||||
subtask = self.orchestrator.create_task(task_model.DriverTask,
|
||||
parent_task_id=task.get_id(), design_id=design_id,
|
||||
action=hd_fields.OrchestratorAction.ApplyNodePlatform,
|
||||
site_name=task.site_name,
|
||||
task_scope={'site': task.site_name, 'node_names': [n]})
|
||||
runner = MaasTaskRunner(state_manager=self.state_manager,
|
||||
orchestrator=self.orchestrator,
|
||||
task_id=subtask.get_id())
|
||||
|
||||
self.logger.info("Starting thread for task %s to config node %s platform" % (subtask.get_id(), n))
|
||||
|
||||
runner.start()
|
||||
subtasks.append(subtask.get_id())
|
||||
|
||||
running_subtasks = len(subtasks)
|
||||
attempts = 0
|
||||
worked = failed = False
|
||||
|
||||
while running_subtasks > 0 and attempts < drydock_provisioner.conf.timeouts.apply_node_platform:
|
||||
for t in subtasks:
|
||||
subtask = self.state_manager.get_task(t)
|
||||
|
||||
if subtask.status == hd_fields.TaskStatus.Complete:
|
||||
self.logger.info("Task %s to configure node %s platform complete - status %s" %
|
||||
(subtask.get_id(), n, subtask.get_result()))
|
||||
running_subtasks = running_subtasks - 1
|
||||
|
||||
if subtask.result == hd_fields.ActionResult.Success:
|
||||
result_detail['successful_nodes'].extend(subtask.node_list)
|
||||
worked = True
|
||||
elif subtask.result == hd_fields.ActionResult.Failure:
|
||||
result_detail['failed_nodes'].extend(subtask.node_list)
|
||||
failed = True
|
||||
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
|
||||
worked = failed = True
|
||||
|
||||
time.sleep(1 * 60)
|
||||
attempts = attempts + 1
|
||||
|
||||
if running_subtasks > 0:
|
||||
self.logger.warning("Time out for task %s before all subtask threads complete" % (task.get_id()))
|
||||
result = hd_fields.ActionResult.DependentFailure
|
||||
result_detail['detail'].append('Some subtasks did not complete before the timeout threshold')
|
||||
elif worked and failed:
|
||||
result = hd_fields.ActionResult.PartialSuccess
|
||||
elif worked:
|
||||
result = hd_fields.ActionResult.Success
|
||||
else:
|
||||
result = hd_fields.ActionResult.Failure
|
||||
|
||||
self.orchestrator.task_field_update(task.get_id(),
|
||||
status=hd_fields.TaskStatus.Complete,
|
||||
result=result,
|
||||
result_detail=result_detail)
|
||||
elif task.action == hd_fields.OrchestratorAction.DeployNode:
|
||||
self.orchestrator.task_field_update(task.get_id(),
|
||||
status=hd_fields.TaskStatus.Running)
|
||||
|
||||
@ -922,6 +992,114 @@ class MaasTaskRunner(drivers.DriverTaskRunner):
|
||||
else:
|
||||
final_result = hd_fields.ActionResult.Success
|
||||
|
||||
self.orchestrator.task_field_update(self.task.get_id(),
|
||||
status=hd_fields.TaskStatus.Complete,
|
||||
result=final_result,
|
||||
result_detail=result_detail)
|
||||
elif task_action == hd_fields.OrchestratorAction.ApplyNodePlatform:
|
||||
try:
|
||||
machine_list = maas_machine.Machines(self.maas_client)
|
||||
machine_list.refresh()
|
||||
|
||||
tag_list = maas_tag.Tags(self.maas_client)
|
||||
tag_list.refresh()
|
||||
except Exception as ex:
|
||||
self.logger.error("Error deploying node, cannot access MaaS: %s" % str(ex))
|
||||
traceback.print_tb(sys.last_traceback)
|
||||
self.orchestrator.task_field_update(self.task.get_id(),
|
||||
status=hd_fields.TaskStatus.Complete,
|
||||
result=hd_fields.ActionResult.Failure,
|
||||
result_detail={'detail': 'Error accessing MaaS API', 'retry': True})
|
||||
return
|
||||
|
||||
nodes = self.task.node_list
|
||||
|
||||
result_detail = {'detail': []}
|
||||
|
||||
worked = failed = False
|
||||
|
||||
for n in nodes:
|
||||
try:
|
||||
self.logger.debug("Locating node %s for platform configuration" % (n))
|
||||
|
||||
node = site_design.get_baremetal_node(n)
|
||||
machine = machine_list.identify_baremetal_node(node, update_name=False)
|
||||
|
||||
if machine is None:
|
||||
self.logger.warning("Could not locate machine for node %s" % n)
|
||||
result_detail['detail'].append("Could not locate machine for node %s" % n)
|
||||
failed = True
|
||||
continue
|
||||
except Exception as ex1:
|
||||
failed = True
|
||||
self.logger.error("Error locating machine for node %s: %s" % (n, str(ex1)))
|
||||
result_detail['detail'].append("Error locating machine for node %s" % (n))
|
||||
continue
|
||||
|
||||
try:
|
||||
# Render the string of all kernel params for the node
|
||||
kp_string = ""
|
||||
|
||||
for k,v in getattr(node, 'kernel_params', {}).items():
|
||||
if v == 'True':
|
||||
kp_string = kp_string + " %s" % (k)
|
||||
else:
|
||||
kp_string = kp_string + " %s=%s" % (k, v)
|
||||
|
||||
if kp_string:
|
||||
# Check if the node has an existing kernel params tag
|
||||
node_kp_tag = tag_list.select("%s_kp" % (node.name))
|
||||
self.logger.info("Configuring kernel parameters for node %s" % (node.name))
|
||||
|
||||
if node_kp_tag is None:
|
||||
self.logger.debug("Creating kernel_params tag for node %s: %s" % (node.name, kp_string))
|
||||
node_kp_tag = maas_tag.Tag(self.maas_client, name="%s_kp" % (node.name), kernel_opts=kp_string)
|
||||
node_kp_tag = tag_list.add(node_kp_tag)
|
||||
node_kp_tag.apply_to_node(machine.resource_id)
|
||||
else:
|
||||
self.logger.debug("Updating tag %s for node %s: %s" % (node_kp_tag.resource_id, node.name, kp_string))
|
||||
node_kp_tag.kernel_opts = kp_string
|
||||
node_kp_tag.update()
|
||||
|
||||
self.logger.info("Applied kernel parameters to node %s" % n)
|
||||
result_detail['detail'].append("Applied kernel parameters to node %s" % (node.name))
|
||||
worked = True
|
||||
except Exception as ex2:
|
||||
failed = True
|
||||
result_detail['detail'].append("Error configuring kernel parameters for node %s" % (n))
|
||||
self.logger.error("Error configuring kernel parameters for node %s: %s" % (n, str(ex2)))
|
||||
continue
|
||||
|
||||
try:
|
||||
if node.tags is not None and len(node.tags) > 0:
|
||||
self.logger.info("Configuring static tags for node %s" % (node.name))
|
||||
|
||||
for t in node.tags:
|
||||
tag_list.refresh()
|
||||
tag = tag_list.select(t)
|
||||
|
||||
if tag is None:
|
||||
self.logger.debug("Creating static tag %s" % t)
|
||||
tag = maas_tag.Tag(self.maas_client, name=t)
|
||||
tag = tag_list.add(tag)
|
||||
|
||||
self.logger.debug("Applying tag %s to node %s" % (tag.resource_id, machine.resource_id))
|
||||
tag.apply_to_node(machine.resource_id)
|
||||
|
||||
self.logger.info("Applied static tags to node %s" % (node.name))
|
||||
result_detail['detail'].append("Applied static tags to node %s" % (node.name))
|
||||
worked = True
|
||||
except Exception as ex3:
|
||||
failed = True
|
||||
result_detail['detail'].append("Error configuring static tags for node %s" % (node.name))
|
||||
self.logger.error("Error configuring static tags for node %s: %s" % (node.name, str(ex3)))
|
||||
continue
|
||||
|
||||
if failed:
|
||||
final_result = hd_fields.ActionResult.Failure
|
||||
else:
|
||||
final_result = hd_fields.ActionResult.Success
|
||||
|
||||
self.orchestrator.task_field_update(self.task.get_id(),
|
||||
status=hd_fields.TaskStatus.Complete,
|
||||
result=final_result,
|
||||
|
137
drydock_provisioner/drivers/node/maasdriver/models/tag.py
Normal file
137
drydock_provisioner/drivers/node/maasdriver/models/tag.py
Normal file
@ -0,0 +1,137 @@
|
||||
# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import drydock_provisioner.error as errors
|
||||
import drydock_provisioner.drivers.node.maasdriver.models.base as model_base
|
||||
|
||||
import yaml
|
||||
|
||||
class Tag(model_base.ResourceBase):
|
||||
|
||||
resource_url = 'tags/{resource_id}/'
|
||||
fields = ['resource_id', 'name', 'defintion', 'kernel_opts']
|
||||
json_fields = ['name','kernel_opts', 'comment', 'definition']
|
||||
|
||||
def __init__(self, api_client, **kwargs):
|
||||
super(Tag, self).__init__(api_client, **kwargs)
|
||||
|
||||
def get_applied_nodes(self):
|
||||
"""
|
||||
Query the list of nodes this tag is currently applied to
|
||||
|
||||
:return: List of MaaS system_ids of nodes
|
||||
"""
|
||||
|
||||
url = self.interpolate_url()
|
||||
|
||||
resp = self.api_client.get(url, op='nodes')
|
||||
|
||||
if resp.status_code == 200:
|
||||
resp_json = resp.json()
|
||||
system_id_list = []
|
||||
|
||||
for n in resp_json:
|
||||
system_id_list.append(n.get('system_id'))
|
||||
|
||||
return system_id_list
|
||||
else:
|
||||
self.logger.error("Error retrieving node/tag pairs, received HTTP %s from MaaS" % resp.status_code)
|
||||
self.logger.debug("MaaS response: %s" % resp.text)
|
||||
raise errors.DriverError("Error retrieving node/tag pairs, received HTTP %s from MaaS" % resp.status_code)
|
||||
|
||||
def apply_to_node(self, system_id):
|
||||
"""
|
||||
Apply this tag to a MaaS node
|
||||
|
||||
:param system_id: MaaS system_id of the node
|
||||
"""
|
||||
|
||||
if system_id in self.get_applied_nodes():
|
||||
self.logger.debug("Tag %s already applied to node %s" % (self.name, system_id))
|
||||
else:
|
||||
url = self.interpolate_url()
|
||||
|
||||
resp = self.api_client.post(url, op='update_nodes', files={'add': system_id})
|
||||
|
||||
if not resp.ok:
|
||||
self.logger.error("Error applying tag to node, received HTTP %s from MaaS" % resp.status_code)
|
||||
self.logger.debug("MaaS response: %s" % resp.text)
|
||||
raise errors.DriverError("Error applying tag to node, received HTTP %s from MaaS" % resp.status_code)
|
||||
|
||||
def to_dict(self):
|
||||
"""
|
||||
Serialize this resource instance into a dict matching the
|
||||
MAAS representation of the resource
|
||||
"""
|
||||
data_dict = {}
|
||||
|
||||
for f in self.json_fields:
|
||||
if getattr(self, f, None) is not None:
|
||||
if f == 'resource_id':
|
||||
data_dict['name'] = getattr(self, f)
|
||||
else:
|
||||
data_dict[f] = getattr(self, f)
|
||||
|
||||
return data_dict
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, api_client, obj_dict):
|
||||
"""
|
||||
Create a instance of this resource class based on a dict
|
||||
of MaaS type attributes
|
||||
|
||||
Customized for Tag due to use of name instead of id
|
||||
as resource key
|
||||
|
||||
:param api_client: Instance of api_client.MaasRequestFactory for accessing MaaS API
|
||||
:param obj_dict: Python dict as parsed from MaaS API JSON representing this resource type
|
||||
"""
|
||||
|
||||
refined_dict = {k: obj_dict.get(k, None) for k in cls.fields}
|
||||
|
||||
if 'name' in obj_dict.keys():
|
||||
refined_dict['resource_id'] = obj_dict.get('name')
|
||||
|
||||
i = cls(api_client, **refined_dict)
|
||||
return i
|
||||
|
||||
class Tags(model_base.ResourceCollectionBase):
|
||||
|
||||
collection_url = 'tags/'
|
||||
collection_resource = Tag
|
||||
|
||||
def __init__(self, api_client, **kwargs):
|
||||
super(Tags, self).__init__(api_client)
|
||||
|
||||
def add(self, res):
|
||||
"""
|
||||
Create a new resource in this collection in MaaS
|
||||
|
||||
Customize as Tag resources use 'name' instead of 'id'
|
||||
|
||||
:param res: Instance of cls.collection_resource
|
||||
"""
|
||||
data_dict = res.to_dict()
|
||||
url = self.interpolate_url()
|
||||
|
||||
resp = self.api_client.post(url, files=data_dict)
|
||||
|
||||
if resp.status_code == 200:
|
||||
resp_json = resp.json()
|
||||
res.set_resource_id(resp_json.get('name'))
|
||||
return res
|
||||
|
||||
raise errors.DriverError("Failed updating MAAS url %s - return code %s"
|
||||
% (url, resp.status_code))
|
||||
|
@ -304,6 +304,15 @@ class YamlIngester(IngesterPlugin):
|
||||
|
||||
model.interfaces.append(int_model)
|
||||
|
||||
platform = spec.get('platform', {})
|
||||
|
||||
model.image = platform.get('image', None)
|
||||
model.kernel = platform.get('kernel', None)
|
||||
|
||||
model.kernel_params = {}
|
||||
for k,v in platform.get('kernel_params', {}).items():
|
||||
model.kernel_params[k] = v
|
||||
|
||||
model.primary_network = spec.get('primary_network', None)
|
||||
|
||||
node_metadata = spec.get('metadata', {})
|
||||
|
@ -47,8 +47,9 @@ class HostProfile(base.DrydockPersistentObject, base.DrydockObject):
|
||||
'owner_data': obj_fields.DictOfStringsField(nullable=True),
|
||||
'rack': obj_fields.StringField(nullable=True),
|
||||
'base_os': obj_fields.StringField(nullable=True),
|
||||
'image': obj_fields.StringField(nullable=True),
|
||||
'kernel': obj_fields.StringField(nullable=True),
|
||||
'kernel_params': obj_fields.StringField(nullable=True),
|
||||
'kernel_params': obj_fields.DictOfStringsField(nullable=True),
|
||||
'primary_network': obj_fields.StringField(nullable=False),
|
||||
}
|
||||
|
||||
@ -91,7 +92,7 @@ class HostProfile(base.DrydockPersistentObject, base.DrydockObject):
|
||||
inheritable_field_list = [
|
||||
'hardware_profile', 'oob_type', 'storage_layout',
|
||||
'bootdisk_device', 'bootdisk_root_size', 'bootdisk_boot_size',
|
||||
'rack', 'base_os', 'kernel', 'kernel_params', 'primary_network']
|
||||
'rack', 'base_os', 'image', 'kernel', 'primary_network']
|
||||
|
||||
# Create applied data from self design values and parent
|
||||
# applied values
|
||||
@ -108,6 +109,8 @@ class HostProfile(base.DrydockPersistentObject, base.DrydockObject):
|
||||
|
||||
self.owner_data = objects.Utils.merge_dicts(self.owner_data, parent.owner_data)
|
||||
|
||||
self.kernel_params = objects.Utils.merge_dicts(self.kernel_params, parent.kernel_params)
|
||||
|
||||
self.interfaces = HostInterfaceList.from_basic_list(
|
||||
HostInterface.merge_lists(self.interfaces, parent.interfaces))
|
||||
|
||||
|
@ -467,13 +467,37 @@ class Orchestrator(object):
|
||||
|
||||
|
||||
if len(node_networking_task.result_detail['successful_nodes']) > 0:
|
||||
self.logger.info("Found %s successfully networked nodes, starting deployment." %
|
||||
(len(node_networking_task.result_detail['successful_nodes'])))
|
||||
self.logger.info("Found %s successfully networked nodes, configuring platform." %
|
||||
(len(node_networking_task.result_detail['successful_nodes'])))
|
||||
|
||||
node_platform_task = self.create_task(tasks.DriverTask,
|
||||
parent_task_id=task.get_id(), design_id=design_id,
|
||||
action=hd_fields.OrchestratorAction.ApplyNodePlatform,
|
||||
task_scope={'site': task_site,
|
||||
'node_names': node_networking_task.result_detail['successful_nodes']})
|
||||
self.logger.info("Starting node driver task %s to configure node platform." % (node_platform_task.get_id()))
|
||||
|
||||
node_driver.execute_task(node_platform_task.get_id())
|
||||
|
||||
node_platform_task = self.state_manager.get_task(node_platform_task.get_id())
|
||||
|
||||
if node_platform_task.get_result() in [hd_fields.ActionResult.Success,
|
||||
hd_fields.ActionResult.PartialSuccess]:
|
||||
worked = True
|
||||
elif node_platform_task.get_result() in [hd_fields.ActionResult.Failure,
|
||||
hd_fields.ActionResult.PartialSuccess]:
|
||||
failed = True
|
||||
else:
|
||||
self.logger.warning("No nodes successfully networked, skipping platform configuration subtask")
|
||||
|
||||
if len(node_platform_task.result_detail['successful_nodes']) > 0:
|
||||
self.logger.info("Configured platform on %s nodes, starting deployment." %
|
||||
(len(node_platform_task.result_detail['successful_nodes'])))
|
||||
node_deploy_task = self.create_task(tasks.DriverTask,
|
||||
parent_task_id=task.get_id(), design_id=design_id,
|
||||
action=hd_fields.OrchestratorAction.DeployNode,
|
||||
task_scope={'site': task_site,
|
||||
'node_names': node_networking_task.result_detail['successful_nodes']})
|
||||
'node_names': node_platform_task.result_detail['successful_nodes']})
|
||||
|
||||
self.logger.info("Starting node driver task %s to deploy nodes." % (node_deploy_task.get_id()))
|
||||
node_driver.execute_task(node_deploy_task.get_id())
|
||||
@ -487,7 +511,7 @@ class Orchestrator(object):
|
||||
hd_fields.ActionResult.PartialSuccess]:
|
||||
failed = True
|
||||
else:
|
||||
self.logger.warning("No nodes successfully networked, skipping deploy subtask")
|
||||
self.logger.warning("Unable to configure platform on any nodes, skipping deploy subtask")
|
||||
|
||||
final_result = None
|
||||
if worked and failed:
|
||||
|
@ -224,6 +224,16 @@ spec:
|
||||
networks:
|
||||
- 'mgmt'
|
||||
- 'admin'
|
||||
# Settings for the platform (operating system)
|
||||
platform:
|
||||
# Which image to deploy on the node, must be available in the provisioner. Defaults to 'ubuntu/xenial'
|
||||
image:
|
||||
# Which kernel to enable. Defaults to generic, can also be hwe (hardware enablement)
|
||||
kernel: generic
|
||||
# K/V list of kernel parameters to configure on boot. No default. Use value of true for params that are just flags
|
||||
kernel_params:
|
||||
console: tty1
|
||||
quiet: true
|
||||
# Metadata about the node
|
||||
metadata:
|
||||
# Explicit tags to propagate to Kubernetes. Simple strings of any value
|
||||
|
@ -39,8 +39,10 @@ create_network_template = 2
|
||||
identify_node = 10
|
||||
configure_hardware = 30
|
||||
apply_node_networking = 5
|
||||
apply_node_platform = 5
|
||||
deploy_node = 45
|
||||
|
||||
[maasdriver]
|
||||
maas_api_url = 'http://localhost:8000/MAAS/api/2.0/'
|
||||
maas_api_key = 'your:secret:key'
|
||||
maas_api_key = 'your:secret:key'
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user