diff --git a/rq.yaml b/rq.yaml index 9f3812a..a2d19d5 100644 --- a/rq.yaml +++ b/rq.yaml @@ -1,4 +1,4 @@ -files: +filelists: by_roles: fuel: [etc-nailgun, etc-fuel] ceph-osd: [etc-ceph] diff --git a/rq/files/etc-apt b/rq/filelists/etc-apt similarity index 100% rename from rq/files/etc-apt rename to rq/filelists/etc-apt diff --git a/rq/files/etc-ceph b/rq/filelists/etc-ceph similarity index 100% rename from rq/files/etc-ceph rename to rq/filelists/etc-ceph diff --git a/rq/files/etc-ceph-controller b/rq/filelists/etc-ceph-controller similarity index 100% rename from rq/files/etc-ceph-controller rename to rq/filelists/etc-ceph-controller diff --git a/rq/files/etc-cinder b/rq/filelists/etc-cinder similarity index 100% rename from rq/files/etc-cinder rename to rq/filelists/etc-cinder diff --git a/rq/files/etc-fuel b/rq/filelists/etc-fuel similarity index 100% rename from rq/files/etc-fuel rename to rq/filelists/etc-fuel diff --git a/rq/files/etc-glance b/rq/filelists/etc-glance similarity index 100% rename from rq/files/etc-glance rename to rq/filelists/etc-glance diff --git a/rq/files/etc-haproxy b/rq/filelists/etc-haproxy similarity index 100% rename from rq/files/etc-haproxy rename to rq/filelists/etc-haproxy diff --git a/rq/files/etc-heat b/rq/filelists/etc-heat similarity index 100% rename from rq/files/etc-heat rename to rq/filelists/etc-heat diff --git a/rq/files/etc-keystone b/rq/filelists/etc-keystone similarity index 100% rename from rq/files/etc-keystone rename to rq/filelists/etc-keystone diff --git a/rq/files/etc-libvirt b/rq/filelists/etc-libvirt similarity index 100% rename from rq/files/etc-libvirt rename to rq/filelists/etc-libvirt diff --git a/rq/files/etc-mysql b/rq/filelists/etc-mysql similarity index 100% rename from rq/files/etc-mysql rename to rq/filelists/etc-mysql diff --git a/rq/files/etc-nailgun b/rq/filelists/etc-nailgun similarity index 100% rename from rq/files/etc-nailgun rename to rq/filelists/etc-nailgun diff --git a/rq/files/etc-neutron b/rq/filelists/etc-neutron similarity index 100% rename from rq/files/etc-neutron rename to rq/filelists/etc-neutron diff --git a/rq/files/etc-nova b/rq/filelists/etc-nova similarity index 100% rename from rq/files/etc-nova rename to rq/filelists/etc-nova diff --git a/rq/files/yum b/rq/filelists/yum similarity index 100% rename from rq/files/yum rename to rq/filelists/yum diff --git a/timmy/cli.py b/timmy/cli.py index 30659fb..cb27241 100755 --- a/timmy/cli.py +++ b/timmy/cli.py @@ -16,7 +16,7 @@ # under the License. import argparse -from timmy.nodes import NodeManager +from timmy.nodes import Node, NodeManager import logging import sys import os @@ -34,33 +34,52 @@ def main(argv=None): ' execution and file' ' collection tool')) parser.add_argument('-c', '--conf', - help='configuration file') + help='Path to YAML a configuration file.') parser.add_argument('-o', '--dest-file', - help='output archive file') + help='Path to an output archive file.') parser.add_argument('-x', '--extended', action='store_true', - help='exec once by role cmdfiles') - parser.add_argument('-e', '--env', help='env id', type=int) + help='Execute extended commands.') + parser.add_argument('-e', '--env', type=int, + help='Env ID. Run only on specific environment.') parser.add_argument('-m', '--maxthreads', type=int, default=100, - help="maximum simultaneous operations for commands") + help=('Maximum simultaneous nodes for command' + 'execution.')) parser.add_argument('-l', '--logs', - help='collect logs from nodes', + help=('Collect logs from nodes. Logs are not collected' + ' by default due to their size.'), action='store_true', dest='getlogs') parser.add_argument('-L', '--logs-maxthreads', type=int, default=100, - help="maximum simultaneous log collection operations") + help='Maximum simultaneous nodes for log collection.') parser.add_argument('--only-logs', action='store_true', - help='Collect only logs from fuel-node') + help='Only collect logs, do not run commands.') parser.add_argument('--log-file', default=None, - help='timmy log file') + help='Output file for Timmy log.') parser.add_argument('--fake-logs', - help="Do not collect logs, only calculate size", - action="store_true") + help='Do not collect logs, only calculate size.', + action='store_true') parser.add_argument('-d', '--debug', - help="print lots of debugging statements, implies -v", - action="store_true") + help='Be extremely verbose.', + action='store_true') parser.add_argument('-v', '--verbose', - help="be verbose", - action="store_true") + help='Be verbose.', + action='store_true') + parser.add_argument('-C', '--command', + help=('Enables shell mode. Shell command to' + ' execute. For help on shell mode, read' + ' timmy/conf.py')) + parser.add_argument('-F', '--file', nargs='+', + help=('Enables shell mode. Files to collect via' + '"scp -r". Result is placed into a folder' + 'specified via "outdir" config option.')) + parser.add_argument('-R', '--role', nargs='+', + help=('run only on the specified role(s). Example:' + ' -R compute ceph-osd any-other-role')) + parser.add_argument('--no-archive', + help=('Do not create results archive. By default,' + ' an arhive with all outputs and files' + ' is created every time you run Timmy.'), + action='store_true') args = parser.parse_args(argv[1:]) loglevel = logging.ERROR if args.verbose: @@ -71,19 +90,38 @@ def main(argv=None): level=loglevel, format='%(asctime)s %(levelname)s %(message)s') conf = load_conf(args.conf) - if args.env is not None: - conf['soft_filter']['cluster'] = [args.env] + if args.command or args.file or conf['shell_mode']: + conf['shell_mode'] = True + # config cleanup for shell mode + for k in Node.conf_actionable: + conf[k] = [] if k in Node.conf_appendable else None + for k in conf: + if k.startswith(Node.conf_match_prefix): + conf.pop(k) + if args.command: + conf[Node.ckey] = [{'stdout': args.command}] + if args.file: + conf[Node.fkey] = args.file + if conf['shell_mode']: + filter = conf['hard_filter'] + else: + filter = conf['soft_filter'] + if args.role: + filter['roles'] = args.role + if args.env: + filter['cluster'] = [args.env] main_arc = os.path.join(conf['archives'], 'general.tar.gz') if args.dest_file: main_arc = args.dest_file nm = NodeManager(conf=conf, extended=args.extended) if not args.only_logs: - nm.launch_ssh(conf['outdir'], args.maxthreads) - nm.get_conf_files(conf['outdir'], args.maxthreads) - nm.create_archive_general(conf['outdir'], - main_arc, - 60) + nm.run_commands(conf['outdir'], args.maxthreads) + nm.get_files(conf['outdir'], args.maxthreads) + if not args.no_archive: + nm.create_archive_general(conf['outdir'], + main_arc, + 60) if args.only_logs or args.getlogs: lf = '/tmp/timmy-logs.lock' lock = flock.FLock(lf) @@ -93,16 +131,24 @@ def main(argv=None): logging.warning('No logs to collect.') return if nm.is_enough_space(conf['archives']): - nm.archive_logs(conf['archives'], - conf['compress_timeout'], - maxthreads=args.logs_maxthreads, - fake=args.fake_logs) + nm.get_logs(conf['archives'], + conf['compress_timeout'], + maxthreads=args.logs_maxthreads, + fake=args.fake_logs) lock.unlock() else: logging.warning('Unable to obtain lock %s, skipping "logs"-part' % lf) logging.info("Nodes:\n%s" % nm) + print('Run complete. Node information:') print(nm) + if conf['shell_mode']: + print('Results:') + for node in nm.nodes.values(): + for cmd, path in node.mapcmds.items(): + with open(path, 'r') as f: + for line in f.readlines(): + print('node-%s: %s' % (node.id, line.rstrip('\n'))) return 0 if __name__ == '__main__': diff --git a/timmy/conf.py b/timmy/conf.py index 38ed6ee..55fc591 100644 --- a/timmy/conf.py +++ b/timmy/conf.py @@ -1,4 +1,4 @@ -from tools import load_yaml_file +from tools import load_yaml_file, choose_path def load_conf(filename): @@ -13,13 +13,22 @@ def load_conf(filename): conf['fuelip'] = 'localhost' conf['outdir'] = '/tmp/timmy/info' conf['timeout'] = 15 - conf['rqdir'] = '/usr/share/timmy/rq' - conf['rqfile'] = '/usr/share/timmy/configs/rq.yaml' + conf['rqdir'] = choose_path('/usr/share/timmy/rq') + conf['rqfile'] = choose_path('/usr/share/timmy/configs/rq.yaml') conf['compress_timeout'] = 3600 conf['archives'] = '/tmp/timmy/archives' conf['cmds_archive'] = '' conf['logs'] = {'path': '/var/log', 'exclude': '[-_]\d{8}$|atop[-_]|\.gz$'} + '''Shell mode - only run what was specified via command line. + Skip actionable conf fields (see timmy/nodes.py -> Node.conf_actionable); + Skip rqfile import; + Skip any overrides (see Node.conf_match_prefix); + Skip 'once' overrides (see Node.conf_once_prefix); + Skip Fuel node; + Print command execution results. Files and outputs will also be in a + place specified by conf['outdir'].''' + conf['shell_mode'] = False if filename: conf_extra = load_yaml_file(filename) conf.update(**conf_extra) diff --git a/timmy/nodes.py b/timmy/nodes.py index 397819a..094720c 100644 --- a/timmy/nodes.py +++ b/timmy/nodes.py @@ -29,20 +29,23 @@ import tools from tools import w_list from copy import deepcopy -ckey = 'cmds' -skey = 'scripts' -fkey = 'files' -lkey = 'logs' - class Node(object): - conf_appendable = [lkey, ckey, skey, fkey] - conf_keep_default = [skey, ckey, fkey] + ckey = 'cmds' + skey = 'scripts' + fkey = 'files' + flkey = 'filelists' + lkey = 'logs' + conf_actionable = [lkey, ckey, skey, fkey, flkey] + conf_appendable = [lkey, ckey, skey, fkey, flkey] + conf_keep_default = [skey, ckey, fkey, flkey] conf_once_prefix = 'once_' conf_match_prefix = 'by_' conf_default_key = '__default' conf_priority_section = conf_match_prefix + 'id' + print_template = '{0:<14} {1:<3} {2:<16} {3:<18} {4:<10} {5:<30}' + print_template += ' {6:<6} {7}' def __init__(self, id, mac, cluster, roles, os_platform, online, status, ip, conf): @@ -55,7 +58,9 @@ class Node(object): self.status = status self.ip = ip self.files = [] + self.filelists = [] self.cmds = [] + self.scripts = [] self.data = {} self.logsize = 0 self.mapcmds = {} @@ -67,11 +72,11 @@ class Node(object): if not self.filtered_out: my_id = self.id else: - my_id = '#' + str(self.id) - - templ = '{0} {1.cluster} {1.ip} {1.mac} {1.os_platform} ' - templ += '{2} {1.online} {1.status}' - return templ.format(my_id, self, ','.join(self.roles)) + my_id = str(self.id) + ' [skipped]' + pt = self.print_template + return pt.format(my_id, self.cluster, self.ip, self.mac, + self.os_platform, ','.join(self.roles), + str(self.online), self.status) def apply_conf(self, conf, clean=True): @@ -132,39 +137,11 @@ class Node(object): setattr(self, f, []) r_apply(conf, p, p_s, c_a, k_d, overridden, d, clean=clean) - def checkos(self, filename): - bname = str(os.path.basename(filename)) - logging.debug('check os: node: %s, filename %s' % - (self.id, filename)) - if bname[0] == '.': - if self.os_platform in bname: - logging.debug('os %s in filename %s' % - (self.os_platform, filename)) - return True - else: - return False - return True - - def exclude_non_os(self): - for key in self.files.keys(): - self.files[key] = [f for f in self.files[key] if self.checkos(f)] - - def add_files(self, dirname, key, ds): - for role in self.roles: - if ('once-by-role' in ds[key] and - role in ds[key]['once-by-role'].keys()): - for f in ds[key]['once-by-role'][role]: - self.files[key] += [os.path.join(dirname, key, - 'once-by-role', role, f)] - self.files[key] = sorted(set(self.files[key])) - logging.debug('add files:\nnode: %s, key: %s, files:\n%s' % - (self.id, key, self.files[key])) - def exec_cmd(self, odir='info', fake=False, ok_codes=[0, ]): sn = 'node-%s' % self.id cl = 'cluster-%s' % self.cluster - logging.debug('%s/%s/%s/%s' % (odir, ckey, cl, sn)) - ddir = os.path.join(odir, ckey, cl, sn) + logging.debug('%s/%s/%s/%s' % (odir, Node.ckey, cl, sn)) + ddir = os.path.join(odir, Node.ckey, cl, sn) if self.cmds: tools.mdir(ddir) for c in self.cmds: @@ -190,11 +167,11 @@ class Node(object): except: logging.error("exec_cmd: can't write to file %s" % dfile) - ddir = os.path.join(odir, skey, cl, sn) + ddir = os.path.join(odir, Node.skey, cl, sn) if self.scripts: tools.mdir(ddir) for scr in self.scripts: - f = os.path.join(self.rqdir, skey, scr) + f = os.path.join(self.rqdir, Node.skey, scr) logging.info('node:%s(%s), exec: %s' % (self.id, self.ip, f)) if not fake: outs, errs, code = tools.ssh_node(ip=self.ip, @@ -235,32 +212,43 @@ class Node(object): (self.id, self.ip, cmd, code, errs)) def get_files(self, odir='info', timeout=15): - logging.info('node:%s(%s), filelist: %s' % - (self.id, self.ip, fkey)) + def check_code(code): + if code != 0: + logging.warning("get_files: node: %s, ip: %s, " + "code: %s, error message: %s" % + (self.id, self.ip, code, errs)) + + logging.info('get_files: node: %s, IP: %s' % (self.id, self.ip)) sn = 'node-%s' % self.id cl = 'cluster-%s' % self.cluster - ddir = os.path.join(odir, fkey, cl, sn) + ddir = os.path.join(odir, Node.fkey, cl, sn) tools.mdir(ddir) - data = '' - for f in self.files: - fname = os.path.join(self.rqdir, 'files', f) - try: - with open(fname, 'r') as df: - for line in df: - if not line.isspace() and line[0] != '#': - data += line - except: - logging.error('could not read file: %s' % fname) - logging.debug('node: %s, data:\n%s' % (self.id, data)) - outs, errs, code = tools.get_files_rsync(ip=self.ip, - data=data, - ssh_opts=self.ssh_opts, - dpath=ddir, - timeout=self.timeout) - if code != 0: - logging.warning("get_files: node: %s, ip: %s, " - "code: %s, error message: %s" % - (self.id, self.ip, code, errs)) + if self.shell_mode: + for file in self.files: + outs, errs, code = tools.get_file_scp(ip=self.ip, + file=file, + ddir=ddir, + recursive=True) + check_code(code) + else: + data = '' + for f in self.filelists: + fname = os.path.join(self.rqdir, Node.flkey, f) + try: + with open(fname, 'r') as df: + for line in df: + if not line.isspace() and line[0] != '#': + data += line + except: + logging.error('could not read file: %s' % fname) + data += '\n'.join(self.files) + logging.debug('node: %s, data:\n%s' % (self.id, data)) + outs, errs, code = tools.get_files_rsync(ip=self.ip, + data=data, + ssh_opts=self.ssh_opts, + dpath=ddir, + timeout=self.timeout) + check_code(code) def logs_populate(self, timeout=5): @@ -317,7 +305,8 @@ class NodeManager(object): def __init__(self, conf, extended=False, filename=None): self.conf = conf self.rqdir = conf['rqdir'].rstrip('/') - self.import_rq() + if not conf['shell_mode']: + self.import_rq() if (not os.path.exists(self.rqdir)): logging.error("directory %s doesn't exist" % (self.rqdir)) sys.exit(1) @@ -336,19 +325,31 @@ class NodeManager(object): else: self.njdata = json.loads(self.get_nodes()) self.nodes_init() + # apply soft-filter on all nodes + for node in self.nodes.values(): + if not self.filter(node, self.conf['soft_filter']): + node.filtered_out = True self.get_version() self.nodes_get_release() - self.nodes_reapply_conf() - self.conf_assign_once() - if extended: - '''TO-DO: load smth like extended.yaml - do additional apply_conf(clean=False) with this yaml. - Move some stuff from rq.yaml to extended.yaml''' - pass + if not conf['shell_mode']: + self.nodes_reapply_conf() + self.conf_assign_once() + if extended: + '''TO-DO: load smth like extended.yaml + do additional apply_conf(clean=False) with this yaml. + Move some stuff from rq.yaml to extended.yaml''' + pass def __str__(self): - s = "#node-id, cluster, admin-ip, mac, os, roles, online, status\n" - return s+'\n'.join([str(n) for n in self.sorted_nodes()]) + pt = Node.print_template + header = pt.format('node-id', 'env', 'ip/hostname', 'mac', 'os', + 'roles', 'online', 'status') + '\n' + nodestrings = [] + # f3flight: I only did this to not print Fuel when it is hard-filtered + for n in self.sorted_nodes(): + if self.filter(n, self.conf['hard_filter']): + nodestrings.append(str(n)) + return header + '\n'.join(nodestrings) def sorted_nodes(self): s = [n for n in sorted(self.nodes.values(), key=lambda x: x.id)] @@ -412,6 +413,9 @@ class NodeManager(object): online=True, ip=self.fuelip, conf=self.conf) + # soft-skip Fuel if it is hard-filtered + if not self.filter(fuelnode, self.conf['hard_filter']): + fuelnode.filtered_out = True self.nodes = {self.fuelip: fuelnode} nodes_json, err, code = tools.ssh_node(ip=self.fuelip, command=fuel_node_cmd, @@ -440,8 +444,6 @@ class NodeManager(object): params[key] = node_data[key] node = Node(**params) if self.filter(node, self.conf['hard_filter']): - if not self.filter(node, self.conf['soft_filter']): - node.filtered_out = True self.nodes[node.ip] = node def get_version(self): @@ -506,8 +508,9 @@ class NodeManager(object): def filter(self, node, node_filter): f = node_filter - if node.id == 0 and f == self.conf['hard_filter']: - return True + # soft-skip Fuel node if shell mode is enabled + if node.id == 0 and self.conf['shell_mode']: + return False else: fnames = [k for k in f if hasattr(node, k) and f[k]] checks = [] @@ -517,7 +520,8 @@ class NodeManager(object): checks.append(not set(node_v).isdisjoint(filter_v)) return all(checks) - def launch_ssh(self, odir='info', timeout=15, fake=False, maxthreads=100): + def run_commands(self, odir='info', timeout=15, fake=False, + maxthreads=100): lock = flock.FLock('/tmp/timmy-cmds.lock') if not lock.lock(): logging.warning('Unable to obtain lock, skipping "cmds"-part') @@ -596,8 +600,7 @@ class NodeManager(object): speed = defspeed return speed - def archive_logs(self, outdir, timeout, - fake=False, maxthreads=10, speed=100): + def get_logs(self, outdir, timeout, fake=False, maxthreads=10, speed=100): if fake: logging.info('archive_logs:skip creating archives(fake:%s)' % fake) return @@ -643,7 +646,7 @@ class NodeManager(object): except: logging.error("archive_logs: can't delete file %s" % tfile) - def get_conf_files(self, odir=fkey, timeout=15): + def get_files(self, odir=Node.fkey, timeout=15): lock = flock.FLock('/tmp/timmy-files.lock') if not lock.lock(): logging.warning('Unable to obtain lock, skipping "files"-part') diff --git a/timmy/tools.py b/timmy/tools.py index b745737..cf6bcf3 100644 --- a/timmy/tools.py +++ b/timmy/tools.py @@ -146,14 +146,21 @@ def get_dir_structure(rootdir): return dir +def choose_path(filename): + if os.path.exists(filename): + return filename + elif '/' in filename: + return filename.split('/')[-1] + + def load_yaml_file(filename): try: - with open(filename, 'r') as f: + with open(choose_path(filename), 'r') as f: return yaml.load(f) except IOError as e: - logging.error("load_conf: I/O error(%s): file: %s; message: %s" % + logging.error("load_conf: I/O error(%s): file: %s; msg: %s" % (e.errno, e.filename, e.strerror)) - sys.exit(1) + return e except ValueError: logging.error("load_conf: Could not convert data") sys.exit(1) @@ -161,10 +168,6 @@ def load_yaml_file(filename): logging.error("load_conf: Could not parse %s:\n%s" % (filename, str(e))) sys.exit(1) - except: - logging.error("load_conf: Unexpected error: %s" % - sys.exc_info()[0]) - sys.exit(1) def mdir(directory): @@ -177,7 +180,7 @@ def mdir(directory): sys.exit(3) -def launch_cmd(command, timeout): +def launch_cmd(command, timeout, input=None): def _timeout_terminate(pid): try: os.kill(pid, 15) @@ -188,13 +191,14 @@ def launch_cmd(command, timeout): logging.info('launch_cmd: command %s' % command) p = subprocess.Popen(command, shell=True, + stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) timeout_killer = None try: timeout_killer = threading.Timer(timeout, _timeout_terminate, [p.pid]) timeout_killer.start() - outs, errs = p.communicate() + outs, errs = p.communicate(input=input) except: try: p.kill() @@ -240,8 +244,7 @@ def ssh_node(ip, command='', ssh_opts=[], env_vars=[], timeout=15, cmd += ' > "' + outputfile + '"' cmd = ("trap 'kill $pid' 15; " + "trap 'kill $pid' 2; " + cmd + '&:; pid=$!; wait $!') - outs, errs, code = launch_cmd(cmd, timeout) - return outs, errs, code + return launch_cmd(cmd, timeout) def get_files_rsync(ip, data, ssh_opts, dpath, timeout=15): @@ -260,31 +263,23 @@ def get_files_rsync(ip, data, ssh_opts, dpath, timeout=15): logging.debug("command:%s\ndata:\n%s" % (cmd, data)) if data == '': return cmd, '', 127 - p = subprocess.Popen(cmd, - shell=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - try: - outs, errs = p.communicate(input=data) - except: - p.kill() - outs, errs = p.communicate() - logging.error("ip: %s, command: %s err: %s, returned: %s" % - (ip, cmd, errs, p.returncode)) + return launch_cmd(cmd, timeout, input=data) - logging.debug("ip: %s, ssh return: err:%s\nouts:%s\ncode:%s" % - (ip, errs, outs, p.returncode)) - logging.info("ip: %s, ssh return: err:%s\ncode:%s" % - (ip, errs, p.returncode)) - return outs, errs, p.returncode + +def get_file_scp(ip, file, ddir, timeout=600, recursive=False): + ddir = ddir.rstrip('/') + '/' + if '/' in file.lstrip('/'): + subpath = ddir + file.lstrip('/')[:file.rfind('/')-1] + mdir(subpath) + r = '-r ' if recursive else '' + cmd = "timeout '%s' scp %s'%s':'%s' '%s'" % (timeout, r, ip, file, ddir) + return launch_cmd(cmd, timeout) def free_space(destdir, timeout): cmd = ("df %s --block-size K 2> /dev/null" " | tail -n 1 | awk '{print $2}' | sed 's/K//g'") % (destdir) - outs, errs, code = launch_cmd(cmd, timeout) - return outs, errs, code + return launch_cmd(cmd, timeout) # wrap non-list into list