Implement files, rename old files to filelists, impl. shell mode, improve output

+ some refactoring/deduplication
+ removed unused functions
+ renamed some functions for clarity
+ moved 'keys' into Node class for access from other modules
+ added get_file_scp to allow wildcards (could not make it work with rsync)
+ added --no-archive option to skip archiving
This commit is contained in:
f3flight 2016-05-12 02:21:06 +00:00
parent 4bd4b591fa
commit d9c2acbe33
20 changed files with 199 additions and 146 deletions

View File

@ -1,4 +1,4 @@
files:
filelists:
by_roles:
fuel: [etc-nailgun, etc-fuel]
ceph-osd: [etc-ceph]

View File

@ -16,7 +16,7 @@
# under the License.
import argparse
from timmy.nodes import NodeManager
from timmy.nodes import Node, NodeManager
import logging
import sys
import os
@ -34,33 +34,52 @@ def main(argv=None):
' execution and file'
' collection tool'))
parser.add_argument('-c', '--conf',
help='configuration file')
help='Path to YAML a configuration file.')
parser.add_argument('-o', '--dest-file',
help='output archive file')
help='Path to an output archive file.')
parser.add_argument('-x', '--extended', action='store_true',
help='exec once by role cmdfiles')
parser.add_argument('-e', '--env', help='env id', type=int)
help='Execute extended commands.')
parser.add_argument('-e', '--env', type=int,
help='Env ID. Run only on specific environment.')
parser.add_argument('-m', '--maxthreads', type=int, default=100,
help="maximum simultaneous operations for commands")
help=('Maximum simultaneous nodes for command'
'execution.'))
parser.add_argument('-l', '--logs',
help='collect logs from nodes',
help=('Collect logs from nodes. Logs are not collected'
' by default due to their size.'),
action='store_true', dest='getlogs')
parser.add_argument('-L', '--logs-maxthreads', type=int, default=100,
help="maximum simultaneous log collection operations")
help='Maximum simultaneous nodes for log collection.')
parser.add_argument('--only-logs',
action='store_true',
help='Collect only logs from fuel-node')
help='Only collect logs, do not run commands.')
parser.add_argument('--log-file', default=None,
help='timmy log file')
help='Output file for Timmy log.')
parser.add_argument('--fake-logs',
help="Do not collect logs, only calculate size",
action="store_true")
help='Do not collect logs, only calculate size.',
action='store_true')
parser.add_argument('-d', '--debug',
help="print lots of debugging statements, implies -v",
action="store_true")
help='Be extremely verbose.',
action='store_true')
parser.add_argument('-v', '--verbose',
help="be verbose",
action="store_true")
help='Be verbose.',
action='store_true')
parser.add_argument('-C', '--command',
help=('Enables shell mode. Shell command to'
' execute. For help on shell mode, read'
' timmy/conf.py'))
parser.add_argument('-F', '--file', nargs='+',
help=('Enables shell mode. Files to collect via'
'"scp -r". Result is placed into a folder'
'specified via "outdir" config option.'))
parser.add_argument('-R', '--role', nargs='+',
help=('run only on the specified role(s). Example:'
' -R compute ceph-osd any-other-role'))
parser.add_argument('--no-archive',
help=('Do not create results archive. By default,'
' an arhive with all outputs and files'
' is created every time you run Timmy.'),
action='store_true')
args = parser.parse_args(argv[1:])
loglevel = logging.ERROR
if args.verbose:
@ -71,19 +90,38 @@ def main(argv=None):
level=loglevel,
format='%(asctime)s %(levelname)s %(message)s')
conf = load_conf(args.conf)
if args.env is not None:
conf['soft_filter']['cluster'] = [args.env]
if args.command or args.file or conf['shell_mode']:
conf['shell_mode'] = True
# config cleanup for shell mode
for k in Node.conf_actionable:
conf[k] = [] if k in Node.conf_appendable else None
for k in conf:
if k.startswith(Node.conf_match_prefix):
conf.pop(k)
if args.command:
conf[Node.ckey] = [{'stdout': args.command}]
if args.file:
conf[Node.fkey] = args.file
if conf['shell_mode']:
filter = conf['hard_filter']
else:
filter = conf['soft_filter']
if args.role:
filter['roles'] = args.role
if args.env:
filter['cluster'] = [args.env]
main_arc = os.path.join(conf['archives'], 'general.tar.gz')
if args.dest_file:
main_arc = args.dest_file
nm = NodeManager(conf=conf,
extended=args.extended)
if not args.only_logs:
nm.launch_ssh(conf['outdir'], args.maxthreads)
nm.get_conf_files(conf['outdir'], args.maxthreads)
nm.create_archive_general(conf['outdir'],
main_arc,
60)
nm.run_commands(conf['outdir'], args.maxthreads)
nm.get_files(conf['outdir'], args.maxthreads)
if not args.no_archive:
nm.create_archive_general(conf['outdir'],
main_arc,
60)
if args.only_logs or args.getlogs:
lf = '/tmp/timmy-logs.lock'
lock = flock.FLock(lf)
@ -93,16 +131,24 @@ def main(argv=None):
logging.warning('No logs to collect.')
return
if nm.is_enough_space(conf['archives']):
nm.archive_logs(conf['archives'],
conf['compress_timeout'],
maxthreads=args.logs_maxthreads,
fake=args.fake_logs)
nm.get_logs(conf['archives'],
conf['compress_timeout'],
maxthreads=args.logs_maxthreads,
fake=args.fake_logs)
lock.unlock()
else:
logging.warning('Unable to obtain lock %s, skipping "logs"-part' %
lf)
logging.info("Nodes:\n%s" % nm)
print('Run complete. Node information:')
print(nm)
if conf['shell_mode']:
print('Results:')
for node in nm.nodes.values():
for cmd, path in node.mapcmds.items():
with open(path, 'r') as f:
for line in f.readlines():
print('node-%s: %s' % (node.id, line.rstrip('\n')))
return 0
if __name__ == '__main__':

View File

@ -1,4 +1,4 @@
from tools import load_yaml_file
from tools import load_yaml_file, choose_path
def load_conf(filename):
@ -13,13 +13,22 @@ def load_conf(filename):
conf['fuelip'] = 'localhost'
conf['outdir'] = '/tmp/timmy/info'
conf['timeout'] = 15
conf['rqdir'] = '/usr/share/timmy/rq'
conf['rqfile'] = '/usr/share/timmy/configs/rq.yaml'
conf['rqdir'] = choose_path('/usr/share/timmy/rq')
conf['rqfile'] = choose_path('/usr/share/timmy/configs/rq.yaml')
conf['compress_timeout'] = 3600
conf['archives'] = '/tmp/timmy/archives'
conf['cmds_archive'] = ''
conf['logs'] = {'path': '/var/log',
'exclude': '[-_]\d{8}$|atop[-_]|\.gz$'}
'''Shell mode - only run what was specified via command line.
Skip actionable conf fields (see timmy/nodes.py -> Node.conf_actionable);
Skip rqfile import;
Skip any overrides (see Node.conf_match_prefix);
Skip 'once' overrides (see Node.conf_once_prefix);
Skip Fuel node;
Print command execution results. Files and outputs will also be in a
place specified by conf['outdir'].'''
conf['shell_mode'] = False
if filename:
conf_extra = load_yaml_file(filename)
conf.update(**conf_extra)

View File

@ -29,20 +29,23 @@ import tools
from tools import w_list
from copy import deepcopy
ckey = 'cmds'
skey = 'scripts'
fkey = 'files'
lkey = 'logs'
class Node(object):
conf_appendable = [lkey, ckey, skey, fkey]
conf_keep_default = [skey, ckey, fkey]
ckey = 'cmds'
skey = 'scripts'
fkey = 'files'
flkey = 'filelists'
lkey = 'logs'
conf_actionable = [lkey, ckey, skey, fkey, flkey]
conf_appendable = [lkey, ckey, skey, fkey, flkey]
conf_keep_default = [skey, ckey, fkey, flkey]
conf_once_prefix = 'once_'
conf_match_prefix = 'by_'
conf_default_key = '__default'
conf_priority_section = conf_match_prefix + 'id'
print_template = '{0:<14} {1:<3} {2:<16} {3:<18} {4:<10} {5:<30}'
print_template += ' {6:<6} {7}'
def __init__(self, id, mac, cluster, roles, os_platform,
online, status, ip, conf):
@ -55,7 +58,9 @@ class Node(object):
self.status = status
self.ip = ip
self.files = []
self.filelists = []
self.cmds = []
self.scripts = []
self.data = {}
self.logsize = 0
self.mapcmds = {}
@ -67,11 +72,11 @@ class Node(object):
if not self.filtered_out:
my_id = self.id
else:
my_id = '#' + str(self.id)
templ = '{0} {1.cluster} {1.ip} {1.mac} {1.os_platform} '
templ += '{2} {1.online} {1.status}'
return templ.format(my_id, self, ','.join(self.roles))
my_id = str(self.id) + ' [skipped]'
pt = self.print_template
return pt.format(my_id, self.cluster, self.ip, self.mac,
self.os_platform, ','.join(self.roles),
str(self.online), self.status)
def apply_conf(self, conf, clean=True):
@ -132,39 +137,11 @@ class Node(object):
setattr(self, f, [])
r_apply(conf, p, p_s, c_a, k_d, overridden, d, clean=clean)
def checkos(self, filename):
bname = str(os.path.basename(filename))
logging.debug('check os: node: %s, filename %s' %
(self.id, filename))
if bname[0] == '.':
if self.os_platform in bname:
logging.debug('os %s in filename %s' %
(self.os_platform, filename))
return True
else:
return False
return True
def exclude_non_os(self):
for key in self.files.keys():
self.files[key] = [f for f in self.files[key] if self.checkos(f)]
def add_files(self, dirname, key, ds):
for role in self.roles:
if ('once-by-role' in ds[key] and
role in ds[key]['once-by-role'].keys()):
for f in ds[key]['once-by-role'][role]:
self.files[key] += [os.path.join(dirname, key,
'once-by-role', role, f)]
self.files[key] = sorted(set(self.files[key]))
logging.debug('add files:\nnode: %s, key: %s, files:\n%s' %
(self.id, key, self.files[key]))
def exec_cmd(self, odir='info', fake=False, ok_codes=[0, ]):
sn = 'node-%s' % self.id
cl = 'cluster-%s' % self.cluster
logging.debug('%s/%s/%s/%s' % (odir, ckey, cl, sn))
ddir = os.path.join(odir, ckey, cl, sn)
logging.debug('%s/%s/%s/%s' % (odir, Node.ckey, cl, sn))
ddir = os.path.join(odir, Node.ckey, cl, sn)
if self.cmds:
tools.mdir(ddir)
for c in self.cmds:
@ -190,11 +167,11 @@ class Node(object):
except:
logging.error("exec_cmd: can't write to file %s" %
dfile)
ddir = os.path.join(odir, skey, cl, sn)
ddir = os.path.join(odir, Node.skey, cl, sn)
if self.scripts:
tools.mdir(ddir)
for scr in self.scripts:
f = os.path.join(self.rqdir, skey, scr)
f = os.path.join(self.rqdir, Node.skey, scr)
logging.info('node:%s(%s), exec: %s' % (self.id, self.ip, f))
if not fake:
outs, errs, code = tools.ssh_node(ip=self.ip,
@ -235,32 +212,43 @@ class Node(object):
(self.id, self.ip, cmd, code, errs))
def get_files(self, odir='info', timeout=15):
logging.info('node:%s(%s), filelist: %s' %
(self.id, self.ip, fkey))
def check_code(code):
if code != 0:
logging.warning("get_files: node: %s, ip: %s, "
"code: %s, error message: %s" %
(self.id, self.ip, code, errs))
logging.info('get_files: node: %s, IP: %s' % (self.id, self.ip))
sn = 'node-%s' % self.id
cl = 'cluster-%s' % self.cluster
ddir = os.path.join(odir, fkey, cl, sn)
ddir = os.path.join(odir, Node.fkey, cl, sn)
tools.mdir(ddir)
data = ''
for f in self.files:
fname = os.path.join(self.rqdir, 'files', f)
try:
with open(fname, 'r') as df:
for line in df:
if not line.isspace() and line[0] != '#':
data += line
except:
logging.error('could not read file: %s' % fname)
logging.debug('node: %s, data:\n%s' % (self.id, data))
outs, errs, code = tools.get_files_rsync(ip=self.ip,
data=data,
ssh_opts=self.ssh_opts,
dpath=ddir,
timeout=self.timeout)
if code != 0:
logging.warning("get_files: node: %s, ip: %s, "
"code: %s, error message: %s" %
(self.id, self.ip, code, errs))
if self.shell_mode:
for file in self.files:
outs, errs, code = tools.get_file_scp(ip=self.ip,
file=file,
ddir=ddir,
recursive=True)
check_code(code)
else:
data = ''
for f in self.filelists:
fname = os.path.join(self.rqdir, Node.flkey, f)
try:
with open(fname, 'r') as df:
for line in df:
if not line.isspace() and line[0] != '#':
data += line
except:
logging.error('could not read file: %s' % fname)
data += '\n'.join(self.files)
logging.debug('node: %s, data:\n%s' % (self.id, data))
outs, errs, code = tools.get_files_rsync(ip=self.ip,
data=data,
ssh_opts=self.ssh_opts,
dpath=ddir,
timeout=self.timeout)
check_code(code)
def logs_populate(self, timeout=5):
@ -317,7 +305,8 @@ class NodeManager(object):
def __init__(self, conf, extended=False, filename=None):
self.conf = conf
self.rqdir = conf['rqdir'].rstrip('/')
self.import_rq()
if not conf['shell_mode']:
self.import_rq()
if (not os.path.exists(self.rqdir)):
logging.error("directory %s doesn't exist" % (self.rqdir))
sys.exit(1)
@ -336,19 +325,31 @@ class NodeManager(object):
else:
self.njdata = json.loads(self.get_nodes())
self.nodes_init()
# apply soft-filter on all nodes
for node in self.nodes.values():
if not self.filter(node, self.conf['soft_filter']):
node.filtered_out = True
self.get_version()
self.nodes_get_release()
self.nodes_reapply_conf()
self.conf_assign_once()
if extended:
'''TO-DO: load smth like extended.yaml
do additional apply_conf(clean=False) with this yaml.
Move some stuff from rq.yaml to extended.yaml'''
pass
if not conf['shell_mode']:
self.nodes_reapply_conf()
self.conf_assign_once()
if extended:
'''TO-DO: load smth like extended.yaml
do additional apply_conf(clean=False) with this yaml.
Move some stuff from rq.yaml to extended.yaml'''
pass
def __str__(self):
s = "#node-id, cluster, admin-ip, mac, os, roles, online, status\n"
return s+'\n'.join([str(n) for n in self.sorted_nodes()])
pt = Node.print_template
header = pt.format('node-id', 'env', 'ip/hostname', 'mac', 'os',
'roles', 'online', 'status') + '\n'
nodestrings = []
# f3flight: I only did this to not print Fuel when it is hard-filtered
for n in self.sorted_nodes():
if self.filter(n, self.conf['hard_filter']):
nodestrings.append(str(n))
return header + '\n'.join(nodestrings)
def sorted_nodes(self):
s = [n for n in sorted(self.nodes.values(), key=lambda x: x.id)]
@ -412,6 +413,9 @@ class NodeManager(object):
online=True,
ip=self.fuelip,
conf=self.conf)
# soft-skip Fuel if it is hard-filtered
if not self.filter(fuelnode, self.conf['hard_filter']):
fuelnode.filtered_out = True
self.nodes = {self.fuelip: fuelnode}
nodes_json, err, code = tools.ssh_node(ip=self.fuelip,
command=fuel_node_cmd,
@ -440,8 +444,6 @@ class NodeManager(object):
params[key] = node_data[key]
node = Node(**params)
if self.filter(node, self.conf['hard_filter']):
if not self.filter(node, self.conf['soft_filter']):
node.filtered_out = True
self.nodes[node.ip] = node
def get_version(self):
@ -506,8 +508,9 @@ class NodeManager(object):
def filter(self, node, node_filter):
f = node_filter
if node.id == 0 and f == self.conf['hard_filter']:
return True
# soft-skip Fuel node if shell mode is enabled
if node.id == 0 and self.conf['shell_mode']:
return False
else:
fnames = [k for k in f if hasattr(node, k) and f[k]]
checks = []
@ -517,7 +520,8 @@ class NodeManager(object):
checks.append(not set(node_v).isdisjoint(filter_v))
return all(checks)
def launch_ssh(self, odir='info', timeout=15, fake=False, maxthreads=100):
def run_commands(self, odir='info', timeout=15, fake=False,
maxthreads=100):
lock = flock.FLock('/tmp/timmy-cmds.lock')
if not lock.lock():
logging.warning('Unable to obtain lock, skipping "cmds"-part')
@ -596,8 +600,7 @@ class NodeManager(object):
speed = defspeed
return speed
def archive_logs(self, outdir, timeout,
fake=False, maxthreads=10, speed=100):
def get_logs(self, outdir, timeout, fake=False, maxthreads=10, speed=100):
if fake:
logging.info('archive_logs:skip creating archives(fake:%s)' % fake)
return
@ -643,7 +646,7 @@ class NodeManager(object):
except:
logging.error("archive_logs: can't delete file %s" % tfile)
def get_conf_files(self, odir=fkey, timeout=15):
def get_files(self, odir=Node.fkey, timeout=15):
lock = flock.FLock('/tmp/timmy-files.lock')
if not lock.lock():
logging.warning('Unable to obtain lock, skipping "files"-part')

View File

@ -146,14 +146,21 @@ def get_dir_structure(rootdir):
return dir
def choose_path(filename):
if os.path.exists(filename):
return filename
elif '/' in filename:
return filename.split('/')[-1]
def load_yaml_file(filename):
try:
with open(filename, 'r') as f:
with open(choose_path(filename), 'r') as f:
return yaml.load(f)
except IOError as e:
logging.error("load_conf: I/O error(%s): file: %s; message: %s" %
logging.error("load_conf: I/O error(%s): file: %s; msg: %s" %
(e.errno, e.filename, e.strerror))
sys.exit(1)
return e
except ValueError:
logging.error("load_conf: Could not convert data")
sys.exit(1)
@ -161,10 +168,6 @@ def load_yaml_file(filename):
logging.error("load_conf: Could not parse %s:\n%s" %
(filename, str(e)))
sys.exit(1)
except:
logging.error("load_conf: Unexpected error: %s" %
sys.exc_info()[0])
sys.exit(1)
def mdir(directory):
@ -177,7 +180,7 @@ def mdir(directory):
sys.exit(3)
def launch_cmd(command, timeout):
def launch_cmd(command, timeout, input=None):
def _timeout_terminate(pid):
try:
os.kill(pid, 15)
@ -188,13 +191,14 @@ def launch_cmd(command, timeout):
logging.info('launch_cmd: command %s' % command)
p = subprocess.Popen(command,
shell=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
timeout_killer = None
try:
timeout_killer = threading.Timer(timeout, _timeout_terminate, [p.pid])
timeout_killer.start()
outs, errs = p.communicate()
outs, errs = p.communicate(input=input)
except:
try:
p.kill()
@ -240,8 +244,7 @@ def ssh_node(ip, command='', ssh_opts=[], env_vars=[], timeout=15,
cmd += ' > "' + outputfile + '"'
cmd = ("trap 'kill $pid' 15; " +
"trap 'kill $pid' 2; " + cmd + '&:; pid=$!; wait $!')
outs, errs, code = launch_cmd(cmd, timeout)
return outs, errs, code
return launch_cmd(cmd, timeout)
def get_files_rsync(ip, data, ssh_opts, dpath, timeout=15):
@ -260,31 +263,23 @@ def get_files_rsync(ip, data, ssh_opts, dpath, timeout=15):
logging.debug("command:%s\ndata:\n%s" % (cmd, data))
if data == '':
return cmd, '', 127
p = subprocess.Popen(cmd,
shell=True,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
try:
outs, errs = p.communicate(input=data)
except:
p.kill()
outs, errs = p.communicate()
logging.error("ip: %s, command: %s err: %s, returned: %s" %
(ip, cmd, errs, p.returncode))
return launch_cmd(cmd, timeout, input=data)
logging.debug("ip: %s, ssh return: err:%s\nouts:%s\ncode:%s" %
(ip, errs, outs, p.returncode))
logging.info("ip: %s, ssh return: err:%s\ncode:%s" %
(ip, errs, p.returncode))
return outs, errs, p.returncode
def get_file_scp(ip, file, ddir, timeout=600, recursive=False):
ddir = ddir.rstrip('/') + '/'
if '/' in file.lstrip('/'):
subpath = ddir + file.lstrip('/')[:file.rfind('/')-1]
mdir(subpath)
r = '-r ' if recursive else ''
cmd = "timeout '%s' scp %s'%s':'%s' '%s'" % (timeout, r, ip, file, ddir)
return launch_cmd(cmd, timeout)
def free_space(destdir, timeout):
cmd = ("df %s --block-size K 2> /dev/null"
" | tail -n 1 | awk '{print $2}' | sed 's/K//g'") % (destdir)
outs, errs, code = launch_cmd(cmd, timeout)
return outs, errs, code
return launch_cmd(cmd, timeout)
# wrap non-list into list