#!/usr/bin/env python # # Copyright 2014 Huawei Technologies Co. Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """binary to deploy a cluster by compass client api.""" import logging import os import re import requests import site import sys import time activate_this = '$PythonHome/bin/activate_this.py' execfile(activate_this, dict(__file__=activate_this)) site.addsitedir('$PythonHome/lib/python2.6/site-packages') sys.path.append('$PythonHome') os.environ['PYTHON_EGG_CACHE'] = '/tmp/.egg' from compass.apiclient.restful import Client from compass.utils import flags from compass.utils import logsetting flags.add('compass_server', help='compass server url', default='http://127.0.0.1/api') flags.add('compass_user_email', help='compass user email', default='admin@huawei.com') flags.add('compass_user_password', help='compass user password', default='admin') flags.add('switch_ips', help='comma seperated switch ips', default='') flags.add('switch_credential', help='comma separated =', default='version=v2c,community=public') flags.add('switch_max_retries', type='int', help='max retries of poll switch', default=5) flags.add('switch_retry_interval', type='int', help='interval to repoll switch', default=10) flags.add_bool('poll_switches', help='if the client polls switches', default=True) flags.add('machines', help='comma separated mac addresses of machines', default='') flags.add('adapter_os_name', help='adapter os name', default=r'(?i)centos.*') flags.add('adapter_target_system', help='adapter target system name', default='openstack') flags.add('adapter_flavor', help='adapter flavor name', default='allinone') flags.add('cluster_name', help='cluster name', default='cluster1') flags.add('credentials', help=( 'comma separated credentials formatted as ' ':=' ), default=( 'server:root=root,service:service=service,' 'console:console=console' )) flags.add('networking', help=( 'semicomma seperated network property and its value ' '=' ), default='') flags.add('partitions', help=( 'comma seperated partitions ' ':=' ), default='tmp:percentage=10,var:percentage=20,home:percentage=40') flags.add('host_roles', help=( 'semicomma separated host roles ' '=', ), default='') flags.add('deployment_timeout', help='deployment timeout in minutes', default=60) flags.add('progress_update_check_interval', help='progress update status check interval in seconds', default=60) flags.add('dashboard_role', help='dashboard role name', default='os-dashboard') flags.add('dashboard_link_pattern', help='dashboard link pattern', default=r'(?m)(http://\d+\.\d+\.\d+\.\d+:5000/v2\.0)') def _get_client(): """get apiclient object.""" return Client(flags.OPTIONS.compass_server) def _login(client): """get apiclient token.""" status, token = client.login( flags.OPTIONS.compass_user_email, flags.OPTIONS.compass_user_password ) logging.info( 'login status: %s, token: %s', status, token ) if status >= 400: raise Exception( 'failed to login %s with user %s', flags.OPTIONS.compass_server, flags.OPTIONS.compass_user_email ) return token def _get_machines(client): """get machines connected to the switch.""" status, resp = client.list_machines() logging.info( 'get all machines status: %s, resp: %s', status, resp) if status >= 400: msg = 'failed to get machines' raise Exception(msg) machines_to_add = set([ machine for machine in flags.OPTIONS.machines.split(',') if machine ]) logging.info('machines to add: %s', list(machines_to_add)) machines = {} for machine in resp: mac = machine['mac'] if mac in machines_to_add: machines[machine['id']] = mac logging.info('found machines: %s', machines.values()) if set(machines.values()) != machines_to_add: msg = 'machines %s is missing' % ( list(machines_to_add - set(machines.values())) ) raise Exception(msg) return machines def _poll_switches(client): """get all switches.""" status, resp = client.list_switches() logging.info('get all switches status: %s resp: %s', status, resp) if status >= 400: msg = 'failed to get switches' raise Exception(msg) all_switches = {} for switch in resp: all_switches[switch['ip']] = switch # add a switch. switch_ips = [ switch_ip for switch_ip in flags.OPTIONS.switch_ips.split(',') if switch_ip ] switch_credential = dict([ credential.split('=', 1) for credential in flags.OPTIONS.switch_credential.split(',') if '=' in credential ]) for switch_ip in switch_ips: if switch_ip not in all_switches: status, resp = client.add_switch(switch_ip, **switch_credential) logging.info('add switch %s status: %s resp: %s', switch_ip, status, resp) if status >= 400: msg = 'failed to add switch %s' % switch_ip raise Exception(msg) all_switches[switch_ip] = resp else: logging.info('switch %s is already added', switch_ip) remain_retries = flags.OPTIONS.switch_max_retries while True: time.sleep(flags.OPTIONS.switch_retry_interval) for switch_ip, switch in all_switches.items(): switch_id = switch['id'] # if the switch is not in under_monitoring, wait for the # poll switch task update the switch information and change # the switch state. logging.info( 'waiting for the switch %s into under_monitoring', switch_ip) status, resp = client.get_switch(switch_id) logging.info('get switch %s status: %s, resp: %s', switch_ip, status, resp) if status >= 400: msg = 'failed to get switch %s' % switch_ip raise Exception(msg) switch = resp all_switches[switch_ip] = switch if switch['state'] == 'notsupported': msg = 'switch %s is not supported', switch_ip raise Exception(msg) elif switch['state'] in ['initialized', 'repolling']: logging.info('switch %s is not updated', switch_ip) elif switch['state'] == 'under_monitoring': logging.info('switch %s is ready', switch_ip) try: return _get_machines(client) except Exception: logging.error('failed to get all machines') if remain_retries > 0: for switch_ip, switch in all_switches.items(): status, resp = client.poll_switch(switch['id']) if status >= 400: msg = 'failed to update switch %s' % switch_ip raise Exception(msg) remain_retries -= 1 else: msg = 'max retries reached' raise Exception(msg) def _get_adapter(client): """get adapter.""" status, resp = client.list_adapters() logging.info( 'get all adapters status: %s, resp: %s', status, resp ) if status >= 400: msg = 'failed to get adapters' raise Exception(msg) os_name_pattern = flags.OPTIONS.adapter_os_name os_name_re = re.compile(os_name_pattern) target_system_pattern = flags.OPTIONS.adapter_target_system target_system_re = re.compile(target_system_pattern) flavor_name_pattern = flags.OPTIONS.adapter_flavor flavor_re = re.compile(flavor_name_pattern) adapter_id = None os_id = None flavor_id = None adapter = None for item in resp: if target_system_re.match(item['distributed_system_name']): adapter = item adapter_id = adapter['id'] break if not adapter_id: msg = 'no adapter found for %s' % target_system_pattern raise Exception(msg) for supported_os in adapter['supported_oses']: if os_name_re.match(supported_os['name']): os_id = supported_os['os_id'] break if not os_id: msg = 'no os found for %s' % os_name_pattern raise Exception(msg) for flavor in adapter['flavors']: if flavor_re.match(flavor['name']): flavor_id = flavor['id'] break if not flavor_id: msg = 'no flavor found for %s' % flavor_name_pattern raise Exception(msg) logging.info('adpater for deploying a cluster: %s', adapter_id) return (adapter_id, os_id, flavor_id) def _add_subnet(client): pass def _add_cluster(client, adapter_id, os_id, flavor_id, machines): """add a cluster.""" cluster_name = flags.OPTIONS.cluster_name status, resp = client.add_cluster( cluster_name, adapter_id, os_id, flavor_id) logging.info('add cluster %s status: %s, resp: %s', cluster_name, status, resp) if status >= 400: msg = 'failed to add cluster %s with adapter %s os %s flavor %s' % ( cluster_name, adapter_id, os_id, flavor_id) raise Exception(msg) cluster = resp cluster_id = cluster['id'] machines_dict = [] for machine_id in machines: machines_dict.append({ 'machine_id': machine_id }) # add hosts to the cluster. status, resp = client.add_hosts_to_cluster( cluster_id, machines_dict) logging.info('add hosts to cluster %s status: %s, resp: %s', cluster_id, status, resp) if status >= 400: msg = 'failed to add machines %s to cluster %s' % ( machines, cluster_name) raise Exception(msg) host_ids = [] for host in resp['cluster_hosts']: host_ids.append(host['id']) logging.info('added hosts in cluster %s: %s', cluster_id, host_ids) if len(host_ids) != len(machines): msg = 'machines %s to add to the cluster %s while hosts %s' % ( machines, cluster_name, host_ids) raise Exception(msg) return {cluster_id: host_ids} def _set_cluster_security(client, cluster_hosts): """set cluster security.""" credentials = [ credential for credential in flags.OPTIONS.credentials.split(',') if ':' in credential ] logging.info('set cluster security: %s', credentials) credential_mapping = {} for credential in credentials: credential_name, username_and_password = credential.split(':', 1) if not credential_name: raise Exception('there is no credential name in %s' % credential) if not username_and_password: raise Exception('there is no username/password in %s' % credential) if '=' not in username_and_password: raise Exception('there is no = in %s' % username_and_password) username, password = username_and_password.split('=', 1) if not username or not password: raise Exception( 'there is no username or password in %s' % ( username_and_password)) credential_mapping['%s_username' % credential_name] = username credential_mapping['%s_password' % credential_name] = password for cluster_id, host_ids in cluster_hosts.items(): status, resp = client.set_security( cluster_id, **credential_mapping) logging.info( 'set security config to cluster %s status: %s, resp: %s', cluster_id, status, resp) if status >= 400: msg = 'failed to set security %s for cluster %s' % ( credential_mapping, cluster_id) raise Exception(msg) def _set_cluster_networking(client, cluster_hosts): """set cluster networking.""" networking_map = {} networkings = [ network for network in flags.OPTIONS.networking.split(';') if '=' in network ] logging.info('set cluster networking: %s', networkings) for networking in networkings: networking_name, networking_value = networking.split('=', 1) if not networking_name: raise Exception( 'there is no networking name in %s' % networking) if networking_name.endswith('_promisc'): networking_map[networking_name] = int(networking_value) else: networking_map[networking_name] = networking_value for cluster_id, host_ids in cluster_hosts.items(): status, resp = client.set_networking( cluster_id, **networking_map) logging.info( 'set networking config %s to cluster %s status: %s, resp: %s', networking_map, cluster_id, status, resp) if status >= 400: msg = 'failed to set networking config %s to cluster %s' % ( networking_map, cluster_id) raise Exception(msg) def _set_cluster_partition(client, cluster_hosts): """set partiton of each host in cluster.""" partitions = [ partition for partition in flags.OPTIONS.partitions.split(',') if ':' in partition ] logging.info('set cluster partition: %s', partitions) partiton_mapping = {} for partition in partitions: partition_name, partition_pair = partition.split(':', 1) if not partition_name: raise Exception( 'there is no partition name in %s' % partition) if not partition_pair: raise Exception( 'there is no partition pair in %s' % partition) if '=' not in partition_pair: raise Exception( 'there is no = in %s' % partition_pair) partition_type, partition_value = partition_pair.split('=', 1) if partition_type == 'percentage': partition_value = int(partition_value) elif partition_type == 'mbytes': partition_value = int(partition_value) else: raise Exception( 'unsupported partition type %s' % partition_type) partiton_mapping[ '%s_%s' % (partition_name, partition_type) ] = partition_value for cluster_id, host_ids in cluster_hosts.items(): status, resp = client.set_partition( cluster_id, **partiton_mapping) logging.info( 'set partition config %s to cluster %s status: %s, resp: %s', partiton_mapping, cluster_id, status, resp) if status >= 400: msg = 'failed to set partition %s to cluster %s' % ( partiton_mapping, cluster_id) raise Exception(msg) def _set_host_config(client, cluster_hosts): host_configs = [] for host in flags.OPTIONS.host_roles.split(';'): if not host: continue hostname, roles = host.split('=', 1) if hostname: roles = [role for role in roles.split(',') if role] host_configs.append({ 'hostname': hostname, 'roles': roles }) total_hosts = 0 for cluster_id, host_ids in cluster_hosts.items(): total_hosts += len(host_ids) if total_hosts != len(host_configs): msg = '%s host to assign but got %s host configs' % ( total_hosts, len(host_configs)) raise Exception(msg) for cluster_id, host_ids in cluster_hosts.items(): for hostid in host_ids: host_config = host_configs.pop(0) status, resp = client.update_host_config( hostid, **host_config) logging.info( 'set host %s config %s status: %s, resp: %s', hostid, host_config, status, resp ) if status >= 400: msg = 'failed to set host %s config %s' % ( hostid, host_config) raise Exception(msg) def _deploy_clusters(client, cluster_hosts): """deploy cluster.""" for cluster_id, host_ids in cluster_hosts.items(): status, resp = client.deploy_hosts(cluster_id) logging.info( 'deploy cluster %s status: %s, resp: %s', cluster_id, status, resp) if status >= 400: msg = 'failed to deploy cluster %s' % cluster_id raise Exception(msg) def _get_installing_progress(client, cluster_hosts): """get intalling progress.""" timeout = time.time() + 60 * float(flags.OPTIONS.deployment_timeout) clusters_progress = {} hosts_progress = {} install_finished = False failed_hosts = {} failed_clusters = {} while time.time() < timeout: found_installing_clusters = False found_installing_hosts = False for cluster_id, host_ids in cluster_hosts.items(): for hostid in host_ids: if hostid in hosts_progress: continue status, resp = client.get_host_installing_progress(hostid) logging.info( 'get host %s installing progress status: %s, resp: %s', hostid, status, resp) if status >= 400: msg = 'failed to get host %s progress' % hostid raise Exception(msg) progress = resp['progress'] if ( progress['state'] not in ['UNINITIALIZED', 'INSTALLING'] or progress['percentage'] >= 1.0 ): hosts_progress[hostid] = progress if progress['state'] in ['ERROR']: failed_hosts[hostid] = progress else: found_installing_hosts = True if cluster_id in clusters_progress: continue status, resp = client.get_cluster_installing_progress(cluster_id) logging.info( 'get cluster %s installing progress status: %s, resp: %s', cluster_id, status, resp) if status >= 400: msg = 'failed to get cluster %s intsalling progress' % ( cluster_id) raise Exception(msg) progress = resp['progress'] if ( progress['state'] not in ['UNINITIALIZED', 'INSTALLING'] or progress['percentage'] >= 1.0 ): clusters_progress[cluster_id] = progress if progress['state'] in ['ERROR']: failed_clusters[cluster_id] = progress else: found_installing_clusters = True if found_installing_clusters and found_installing_hosts: logging.info( 'there are some clusters/hosts in installing.' 'sleep %s seconds and retry', flags.OPTIONS.progress_update_check_interval) time.sleep(float(flags.OPTIONS.progress_update_check_interval)) else: install_finished = True logging.info('all clusters/hosts are installed.') break if not install_finished: msg = 'installing %s is not all finished: hosts %s clusters %s' % ( cluster_hosts, hosts_progress, clusters_progress) raise Exception(msg) if failed_hosts: msg = 'installing hosts failed: %s' % failed_hosts raise Exception(msg) if failed_clusters: msg = 'installing clusters failed: %s' % failed_clusters raise Exception(msg) def _check_dashboard_links(client, cluster_hosts): dashboard_role = flags.OPTIONS.dashboard_role dashboard_link_pattern = re.compile( flags.OPTIONS.dashboard_link_pattern) for cluster_id, host_ids in cluster_hosts.items(): status, resp = client.get_dashboard_links(cluster_id) logging.info( 'get cluster %s dashboard links status: %s, resp: %s', cluster_id, status, resp) if status >= 400: msg = 'failed to get cluster %s dashboard links' % cluster_id raise Exception(msg) dashboardlinks = resp['dashboardlinks'] if dashboard_role not in dashboardlinks: msg = 'no dashboard role %s found in %s' % ( dashboard_role, dashboardlinks) raise Exception(msg) r = requests.get(dashboardlinks[dashboard_role], verify=False) r.raise_for_status() match = dashboard_link_pattern.search(r.text) if match: logging.info( 'dashboard login page for cluster %s can be downloaded', cluster_id) else: msg = ( '%s dashboard login page failed to be downloaded\n' 'the context is:\n%s\n' ) % (dashboard_role, r.text) raise Exception(msg) def main(): flags.init() logsetting.init() client = _get_client() _login(client) if flags.OPTIONS.poll_switches: machines = _poll_switches(client) else: machines = _get_machines(client) adapter_id, os_id, flavor_id = _get_adapter(client) cluster_hosts = _add_cluster( client, adapter_id, os_id, flavor_id, machines) _set_cluster_security(client, cluster_hosts) _set_cluster_networking(client, cluster_hosts) _set_cluster_partition(client, cluster_hosts) _set_host_config(client, cluster_hosts) _deploy_clusters(client, cluster_hosts) _get_installing_progress(client, cluster_hosts) _check_dashboard_links(client, cluster_hosts) if __name__ == "__main__": main()