commit
485a69d91e
36
config.yaml
36
config.yaml
@ -1,23 +1,29 @@
|
|||||||
ssh_opts:
|
ssh_opts:
|
||||||
- -oConnectTimeout=2
|
- '-oConnectTimeout=2'
|
||||||
- -oStrictHostKeyChecking=no
|
- '-oStrictHostKeyChecking=no'
|
||||||
- -oUserKnownHostsFile=/dev/null
|
- '-oUserKnownHostsFile=/dev/null'
|
||||||
- -oLogLevel=error
|
- '-oLogLevel=error'
|
||||||
- -lroot
|
- '-lroot'
|
||||||
- -oBatchMode=yes
|
- '-oBatchMode=yes'
|
||||||
env_vars:
|
env_vars:
|
||||||
- OPENRC=/root/openrc
|
- 'OPENRC=/root/openrc'
|
||||||
- IPTABLES_STR="iptables -nvL"
|
- 'IPTABLES_STR="iptables -nvL"'
|
||||||
fuelip: 127.0.0.1
|
fuelip: '127.0.0.1'
|
||||||
rqdir: ./rq
|
rqdir: './rq'
|
||||||
soft_filter:
|
soft_filter:
|
||||||
status: ['ready']
|
status: ['ready']
|
||||||
timeout: 15
|
timeout: 15
|
||||||
compress_timeout: 3600
|
compress_timeout: 3600
|
||||||
log_files:
|
log_path: '/var/log'
|
||||||
path: /var/log
|
log_filter:
|
||||||
filter:
|
|
||||||
default:
|
|
||||||
include: '(.)*'
|
include: '(.)*'
|
||||||
exclude: '[-_]\d{8}$|atop[-_]|\.gz$'
|
exclude: '[-_]\d{8}$|atop[-_]|\.gz$'
|
||||||
|
# by_role:
|
||||||
|
# contrail:
|
||||||
|
# log_filter:
|
||||||
|
# include: 'contrail'
|
||||||
|
# by_node_id:
|
||||||
|
# 3:
|
||||||
|
# env_vars:
|
||||||
|
# OPENRC: '/root/openrc'
|
||||||
|
# IPTABLES_STR: 'iptables -L'
|
||||||
|
5
setup.py
5
setup.py
@ -17,7 +17,4 @@ setup(name='timmy',
|
|||||||
packages=["timmy"],
|
packages=["timmy"],
|
||||||
data_files=rqfiles,
|
data_files=rqfiles,
|
||||||
include_package_data=True,
|
include_package_data=True,
|
||||||
entry_points = {
|
entry_points={'console_scripts': ['timmy=timmy.cli:main']})
|
||||||
'console_scripts': ['timmy = timmy.cli:main']
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
22
timmy/cli.py
22
timmy/cli.py
@ -16,15 +16,16 @@
|
|||||||
# under the License.
|
# under the License.
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import timmy
|
|
||||||
from timmy import nodes
|
from timmy import nodes
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
from timmy.conf import Conf
|
from timmy.conf import Conf
|
||||||
from timmy import flock
|
from timmy import flock
|
||||||
|
from timmy.tools import interrupt_wrapper
|
||||||
|
|
||||||
|
|
||||||
|
@interrupt_wrapper
|
||||||
def main(argv=None):
|
def main(argv=None):
|
||||||
if argv is None:
|
if argv is None:
|
||||||
argv = sys.argv
|
argv = sys.argv
|
||||||
@ -42,10 +43,12 @@ def main(argv=None):
|
|||||||
parser.add_argument('-l', '--logs',
|
parser.add_argument('-l', '--logs',
|
||||||
help='collect logs from nodes',
|
help='collect logs from nodes',
|
||||||
action='store_true', dest='getlogs')
|
action='store_true', dest='getlogs')
|
||||||
|
parser.add_argument('-L', '--logs-maxthreads', type=int, default=100,
|
||||||
|
help="maximum simultaneous log collection operations")
|
||||||
parser.add_argument('--only-logs',
|
parser.add_argument('--only-logs',
|
||||||
action='store_true',
|
action='store_true',
|
||||||
help='Collect only logs from fuel-node')
|
help='Collect only logs from fuel-node')
|
||||||
parser.add_argument('--log-file',
|
parser.add_argument('--log-file', default=None,
|
||||||
help='timmy log file')
|
help='timmy log file')
|
||||||
parser.add_argument('--fake-logs',
|
parser.add_argument('--fake-logs',
|
||||||
help="Do not collect logs, only calculate size",
|
help="Do not collect logs, only calculate size",
|
||||||
@ -63,12 +66,7 @@ def main(argv=None):
|
|||||||
loglevel = logging.DEBUG
|
loglevel = logging.DEBUG
|
||||||
else:
|
else:
|
||||||
loglevel = logging.INFO
|
loglevel = logging.INFO
|
||||||
if args.log_file:
|
logging.basicConfig(filename=args.log_file,
|
||||||
logfile = args.log_file
|
|
||||||
else:
|
|
||||||
logfile = None
|
|
||||||
logging.basicConfig(
|
|
||||||
filename=logfile,
|
|
||||||
level=loglevel,
|
level=loglevel,
|
||||||
format='%(asctime)s %(levelname)s %(message)s')
|
format='%(asctime)s %(levelname)s %(message)s')
|
||||||
config = Conf()
|
config = Conf()
|
||||||
@ -92,15 +90,19 @@ def main(argv=None):
|
|||||||
lf = '/tmp/timmy-logs.lock'
|
lf = '/tmp/timmy-logs.lock'
|
||||||
lock = flock.FLock(lf)
|
lock = flock.FLock(lf)
|
||||||
if lock.lock():
|
if lock.lock():
|
||||||
|
try:
|
||||||
n.get_node_file_list()
|
n.get_node_file_list()
|
||||||
n.calculate_log_size()
|
n.calculate_log_size()
|
||||||
if n.is_enough_space(config.archives):
|
if n.is_enough_space(config.archives):
|
||||||
n.create_log_archives(config.archives,
|
n.archive_logs(config.archives,
|
||||||
config.compress_timeout,
|
config.compress_timeout,
|
||||||
|
maxthreads=args.logs_maxthreads,
|
||||||
fake=args.fake_logs)
|
fake=args.fake_logs)
|
||||||
|
finally:
|
||||||
lock.unlock()
|
lock.unlock()
|
||||||
else:
|
else:
|
||||||
logging.warning('Unable to obtain lock %s, skipping "logs"-part' % lf)
|
logging.warning('Unable to obtain lock %s, skipping "logs"-part' %
|
||||||
|
lf)
|
||||||
logging.info("Nodes:\n%s" % n)
|
logging.info("Nodes:\n%s" % n)
|
||||||
print(n)
|
print(n)
|
||||||
return 0
|
return 0
|
||||||
|
@ -20,9 +20,9 @@ class Conf(object):
|
|||||||
compress_timeout = 3600
|
compress_timeout = 3600
|
||||||
archives = '/tmp/timmy/archives'
|
archives = '/tmp/timmy/archives'
|
||||||
cmds_archive = ''
|
cmds_archive = ''
|
||||||
log_files = {}
|
log_path = '/var/log'
|
||||||
log_files['filter'] = {'default': {'include': "(.)*", 'exclude': '[-_]\d{8}$|atop[-_]|\.gz$'}}
|
log_filter = {'include': '',
|
||||||
log_files['path'] = '/var/log/'
|
'exclude': '[-_]\d{8}$|atop[-_]|\.gz$'}
|
||||||
|
|
||||||
def __init__(self, **entries):
|
def __init__(self, **entries):
|
||||||
self.__dict__.update(entries)
|
self.__dict__.update(entries)
|
||||||
@ -38,16 +38,19 @@ class Conf(object):
|
|||||||
conf = yaml.load(f)
|
conf = yaml.load(f)
|
||||||
return Conf(**conf)
|
return Conf(**conf)
|
||||||
except IOError as e:
|
except IOError as e:
|
||||||
logging.error("load_conf: I/O error(%s): %s" % (e.errno, e.strerror))
|
logging.error("load_conf: I/O error(%s): %s" %
|
||||||
|
(e.errno, e.strerror))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
logging.error("load_conf: Could not convert data")
|
logging.error("load_conf: Could not convert data")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
except yaml.parser.ParserError as e:
|
except yaml.parser.ParserError as e:
|
||||||
logging.error("load_conf: Could not parse %s:\n%s" % (filename, str(e)))
|
logging.error("load_conf: Could not parse %s:\n%s" %
|
||||||
|
(filename, str(e)))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
except:
|
except:
|
||||||
logging.error("load_conf: Unexpected error: %s" % sys.exc_info()[0])
|
logging.error("load_conf: Unexpected error: %s" %
|
||||||
|
sys.exc_info()[0])
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
@ -46,7 +46,8 @@ class FLock:
|
|||||||
self.lockfd = os.open(self.lockfile,
|
self.lockfd = os.open(self.lockfile,
|
||||||
os.O_TRUNC | os.O_CREAT | os.O_RDWR)
|
os.O_TRUNC | os.O_CREAT | os.O_RDWR)
|
||||||
|
|
||||||
# Acquire exclusive lock on the file, but don't block waiting for it
|
# Acquire exclusive lock on the file,
|
||||||
|
# but don't block waiting for it
|
||||||
fcntl.flock(self.lockfd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
fcntl.flock(self.lockfd, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||||
|
|
||||||
# Writing to file is pointless, nobody can see it
|
# Writing to file is pointless, nobody can see it
|
||||||
@ -54,7 +55,8 @@ class FLock:
|
|||||||
|
|
||||||
return True
|
return True
|
||||||
except (OSError, IOError), e:
|
except (OSError, IOError), e:
|
||||||
# Lock cannot be acquired is okay, everything else reraise exception
|
# Lock cannot be acquired is okay,
|
||||||
|
# everything else reraise exception
|
||||||
if e.errno in (errno.EACCES, errno.EAGAIN):
|
if e.errno in (errno.EACCES, errno.EAGAIN):
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
@ -67,7 +69,7 @@ class FLock:
|
|||||||
os.unlink(self.lockfile)
|
os.unlink(self.lockfile)
|
||||||
# Just in case, let's not leak file descriptors
|
# Just in case, let's not leak file descriptors
|
||||||
os.close(self.lockfd)
|
os.close(self.lockfd)
|
||||||
except (OSError, IOError), e:
|
except (OSError, IOError):
|
||||||
# Ignore error destroying lock file. See class doc about how
|
# Ignore error destroying lock file. See class doc about how
|
||||||
# lockfile can be erased and everything still works normally.
|
# lockfile can be erased and everything still works normally.
|
||||||
pass
|
pass
|
||||||
|
438
timmy/nodes.py
438
timmy/nodes.py
@ -24,9 +24,8 @@ import json
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
import threading
|
|
||||||
import re
|
import re
|
||||||
from tools import *
|
import tools
|
||||||
|
|
||||||
ckey = 'cmds'
|
ckey = 'cmds'
|
||||||
fkey = 'files'
|
fkey = 'files'
|
||||||
@ -36,6 +35,9 @@ varlogdir = '/var/log'
|
|||||||
|
|
||||||
class Node(object):
|
class Node(object):
|
||||||
|
|
||||||
|
override_by_id = ['ssh_opts', 'env_vars', 'log_path', 'log_filter']
|
||||||
|
aggregate_by_role = ['log_path', 'log_filter']
|
||||||
|
|
||||||
def __init__(self, node_id, mac, cluster, roles, os_platform,
|
def __init__(self, node_id, mac, cluster, roles, os_platform,
|
||||||
online, status, ip, conf):
|
online, status, ip, conf):
|
||||||
self.node_id = node_id
|
self.node_id = node_id
|
||||||
@ -51,28 +53,33 @@ class Node(object):
|
|||||||
self.logsize = 0
|
self.logsize = 0
|
||||||
self.flogs = {}
|
self.flogs = {}
|
||||||
self.mapcmds = {}
|
self.mapcmds = {}
|
||||||
|
self.logs = {}
|
||||||
self.set_conf(conf)
|
self.set_conf(conf)
|
||||||
|
|
||||||
def set_conf(self, conf):
|
def override_conf(self, conf):
|
||||||
logging.info(conf.ssh_opts)
|
for field in Node.aggregate_by_role:
|
||||||
self.ssh_opts = " ".join(conf.ssh_opts)
|
for role in self.roles:
|
||||||
self.env_vars = " ".join(conf.env_vars)
|
|
||||||
self.log_files = conf.log_files
|
|
||||||
self.timeout = conf.timeout
|
|
||||||
try:
|
try:
|
||||||
conf.by_node_id
|
getattr(self, field).append(conf.by_role[self.role][field])
|
||||||
except:
|
except:
|
||||||
return
|
pass
|
||||||
if self.node_id in conf.by_node_id:
|
for field in Node.override_by_id:
|
||||||
if 'ssh_opts' in conf.by_node_id[self.node_id]:
|
try:
|
||||||
self.ssh_opts = " ".join(conf.by_node_id[self.node_id]['ssh_opts'])
|
setattr(self, field, conf.by_node_id[self.node_id][field])
|
||||||
if 'env_vars' in conf.by_node_id[self.node_id]:
|
except:
|
||||||
self.env_vars = " ".join(conf.by_node_id[self.node_id]['env_vars'])
|
pass
|
||||||
if 'log_files' in conf.by_node_id[self.node_id]:
|
|
||||||
self.log_files = conf.by_node_id[self.node_id]['log_files']
|
def set_conf(self, conf):
|
||||||
|
self.ssh_opts = conf.ssh_opts
|
||||||
|
self.env_vars = conf.env_vars
|
||||||
|
self.log_path = list([conf.log_path])
|
||||||
|
self.log_filter = list([conf.log_filter])
|
||||||
|
self.timeout = conf.timeout
|
||||||
|
self.override_conf(conf)
|
||||||
|
|
||||||
def set_files(self, dirname, key, ds, version):
|
def set_files(self, dirname, key, ds, version):
|
||||||
files = []
|
files = []
|
||||||
|
dfs = 'default'
|
||||||
for role in self.roles:
|
for role in self.roles:
|
||||||
if 'by-role' in ds[key] and role in ds[key]['by-role'].keys():
|
if 'by-role' in ds[key] and role in ds[key]['by-role'].keys():
|
||||||
for f in ds[key]['by-role'][role]:
|
for f in ds[key]['by-role'][role]:
|
||||||
@ -86,9 +93,9 @@ class Node(object):
|
|||||||
for f in ds[key]['by-os'][self.os_platform].keys():
|
for f in ds[key]['by-os'][self.os_platform].keys():
|
||||||
files += [os.path.join(dirname, key, 'by-os',
|
files += [os.path.join(dirname, key, 'by-os',
|
||||||
self.os_platform, f)]
|
self.os_platform, f)]
|
||||||
if 'default' in ds[key] and 'default' in ds[key]['default']:
|
if dfs in ds[key] and dfs in ds[key][dfs]:
|
||||||
for f in ds[key]['default']['default'].keys():
|
for f in ds[key][dfs][dfs].keys():
|
||||||
files += [os.path.join(dirname, key, 'default', 'default', f)]
|
files += [os.path.join(dirname, key, dfs, dfs, f)]
|
||||||
self.files[key] = sorted(set(files))
|
self.files[key] = sorted(set(files))
|
||||||
logging.debug('set_files:\nkey: %s, node: %s, file_list: %s' %
|
logging.debug('set_files:\nkey: %s, node: %s, file_list: %s' %
|
||||||
(key, self.node_id, self.files[key]))
|
(key, self.node_id, self.files[key]))
|
||||||
@ -112,7 +119,8 @@ class Node(object):
|
|||||||
|
|
||||||
def add_files(self, dirname, key, ds):
|
def add_files(self, dirname, key, ds):
|
||||||
for role in self.roles:
|
for role in self.roles:
|
||||||
if 'once-by-role' in ds[key] and role in ds[key]['once-by-role'].keys():
|
if ('once-by-role' in ds[key] and
|
||||||
|
role in ds[key]['once-by-role'].keys()):
|
||||||
for f in ds[key]['once-by-role'][role]:
|
for f in ds[key]['once-by-role'][role]:
|
||||||
self.files[key] += [os.path.join(dirname, key,
|
self.files[key] += [os.path.join(dirname, key,
|
||||||
'once-by-role', role, f)]
|
'once-by-role', role, f)]
|
||||||
@ -125,11 +133,11 @@ class Node(object):
|
|||||||
cl = 'cluster-%s' % self.cluster
|
cl = 'cluster-%s' % self.cluster
|
||||||
logging.debug('%s/%s/%s/%s' % (odir, label, cl, sn))
|
logging.debug('%s/%s/%s/%s' % (odir, label, cl, sn))
|
||||||
ddir = os.path.join(odir, label, cl, sn)
|
ddir = os.path.join(odir, label, cl, sn)
|
||||||
mdir(ddir)
|
tools.mdir(ddir)
|
||||||
for f in self.files[label]:
|
for f in self.files[label]:
|
||||||
logging.info('node:%s(%s), exec: %s' % (self.node_id, self.ip, f))
|
logging.info('node:%s(%s), exec: %s' % (self.node_id, self.ip, f))
|
||||||
if not fake:
|
if not fake:
|
||||||
outs, errs, code = ssh_node(ip=self.ip,
|
outs, errs, code = tools.ssh_node(ip=self.ip,
|
||||||
filename=f,
|
filename=f,
|
||||||
ssh_opts=self.ssh_opts,
|
ssh_opts=self.ssh_opts,
|
||||||
env_vars=self.env_vars,
|
env_vars=self.env_vars,
|
||||||
@ -154,7 +162,7 @@ class Node(object):
|
|||||||
def exec_simple_cmd(self, cmd, infile, outfile, timeout=15, fake=False):
|
def exec_simple_cmd(self, cmd, infile, outfile, timeout=15, fake=False):
|
||||||
logging.info('node:%s(%s), exec: %s' % (self.node_id, self.ip, cmd))
|
logging.info('node:%s(%s), exec: %s' % (self.node_id, self.ip, cmd))
|
||||||
if not fake:
|
if not fake:
|
||||||
outs, errs, code = ssh_node(ip=self.ip,
|
outs, errs, code = tools.ssh_node(ip=self.ip,
|
||||||
command=cmd,
|
command=cmd,
|
||||||
ssh_opts=self.ssh_opts,
|
ssh_opts=self.ssh_opts,
|
||||||
env_vars=self.env_vars,
|
env_vars=self.env_vars,
|
||||||
@ -171,7 +179,7 @@ class Node(object):
|
|||||||
(self.node_id, self.ip, label))
|
(self.node_id, self.ip, label))
|
||||||
cmd = 'du -b %s' % self.data[label].replace('\n', ' ')
|
cmd = 'du -b %s' % self.data[label].replace('\n', ' ')
|
||||||
logging.info('node: %s, logs du-cmd: %s' % (self.node_id, cmd))
|
logging.info('node: %s, logs du-cmd: %s' % (self.node_id, cmd))
|
||||||
outs, errs, code = ssh_node(ip=self.ip,
|
outs, errs, code = tools.ssh_node(ip=self.ip,
|
||||||
command=cmd,
|
command=cmd,
|
||||||
sshopts=sshopts,
|
sshopts=sshopts,
|
||||||
sshvars='',
|
sshvars='',
|
||||||
@ -194,14 +202,14 @@ class Node(object):
|
|||||||
logging.info("node: %s, ip: %s, size: %s" %
|
logging.info("node: %s, ip: %s, size: %s" %
|
||||||
(self.node_id, self.ip, self.logsize))
|
(self.node_id, self.ip, self.logsize))
|
||||||
|
|
||||||
def get_files(self, label, sshopts, odir='info', timeout=15):
|
def get_files(self, label, odir='info', timeout=15):
|
||||||
logging.info('node:%s(%s), filelist: %s' %
|
logging.info('node:%s(%s), filelist: %s' %
|
||||||
(self.node_id, self.ip, label))
|
(self.node_id, self.ip, label))
|
||||||
sn = 'node-%s' % self.node_id
|
sn = 'node-%s' % self.node_id
|
||||||
cl = 'cluster-%s' % self.cluster
|
cl = 'cluster-%s' % self.cluster
|
||||||
ddir = os.path.join(odir, label, cl, sn)
|
ddir = os.path.join(odir, label, cl, sn)
|
||||||
mdir(ddir)
|
tools.mdir(ddir)
|
||||||
outs, errs, code = get_files_rsync(ip=self.ip,
|
outs, errs, code = tools.get_files_rsync(ip=self.ip,
|
||||||
data=self.data[label],
|
data=self.data[label],
|
||||||
ssh_opts=self.ssh_opts,
|
ssh_opts=self.ssh_opts,
|
||||||
dpath=ddir,
|
dpath=ddir,
|
||||||
@ -224,95 +232,24 @@ class Node(object):
|
|||||||
logging.debug('node: %s, key: %s, data:\n%s' %
|
logging.debug('node: %s, key: %s, data:\n%s' %
|
||||||
(self.node_id, key, self.data[key]))
|
(self.node_id, key, self.data[key]))
|
||||||
|
|
||||||
def apply_include_filter(self, lfilter):
|
def logs_filter(self):
|
||||||
logging.info('apply_include_filter: node: %s, filter: %s' % (self.node_id, lfilter))
|
result = {}
|
||||||
flogs = {}
|
for re_pair in self.log_filter:
|
||||||
if 'include' in lfilter and lfilter['include'] is not None:
|
for f, s in self.logs.items():
|
||||||
for f in self.dulogs.splitlines():
|
if (('include' not in re_pair or
|
||||||
try:
|
re.search(re_pair['include'], f)) and
|
||||||
if ('include' in lfilter and re.search(lfilter['include'], f)):
|
('exclude' not in re_pair or
|
||||||
flogs[f.split("\t")[1]] = int(f.split("\t")[0])
|
not re.search(re_pair['exclude'], f))):
|
||||||
else:
|
result[f] = s
|
||||||
logging.debug("filter %s by %s" % (f, lfilter))
|
self.logs = result
|
||||||
except re.error as e:
|
|
||||||
logging.error('logs_include_filter: filter: %s, str: %s, re.error: %s' %
|
|
||||||
(lfilter, f, str(e)))
|
|
||||||
sys.exit(5)
|
|
||||||
|
|
||||||
self.flogs.update(flogs)
|
def logs_populate(self, timeout=5):
|
||||||
return True
|
got_logs = False
|
||||||
else:
|
for path in self.log_path:
|
||||||
return False
|
|
||||||
|
|
||||||
def apply_exclude_filter(self, lfilter):
|
|
||||||
logging.info('apply_exclude_filter: node: %s, filter: %s' % (self.node_id, lfilter))
|
|
||||||
rflogs = []
|
|
||||||
if 'exclude' in lfilter and lfilter['exclude'] is None:
|
|
||||||
return True
|
|
||||||
if 'exclude' in lfilter and lfilter['exclude'] is not None:
|
|
||||||
for f in self.flogs:
|
|
||||||
try:
|
|
||||||
if re.search(lfilter['exclude'], f):
|
|
||||||
rflogs.append(f)
|
|
||||||
logging.info('logs_exclude_filter: %s' % f)
|
|
||||||
except re.error as e:
|
|
||||||
logging.error('logs_include_filter: filter: %s, str: %s, re.error: %s' %
|
|
||||||
(lfilter, f, str(e)))
|
|
||||||
sys.exit(5)
|
|
||||||
for f in rflogs:
|
|
||||||
logging.debug('apply_exclude_filter: node: %s remove file: %s from log list' % (self.node_id, f ))
|
|
||||||
self.flogs.pop(f, None)
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
|
|
||||||
def logs_filter(self, filterconf):
|
|
||||||
brstr = 'by_role'
|
|
||||||
flogs = {}
|
|
||||||
logging.info('logs_filter: node: %s, filter: %s' % (self.node_id, filterconf))
|
|
||||||
bynodeidinc = False
|
|
||||||
bynodeidexc = False
|
|
||||||
# need to check the following logic:
|
|
||||||
if 'by_node_id' in filterconf and self.node_id in filterconf['by_node_id']:
|
|
||||||
if self.apply_include_filter(filterconf['by_node_id'][self.node_id]):
|
|
||||||
bynodeidinc = True
|
|
||||||
if self.apply_exclude_filter(filterconf['by_node_id'][self.node_id]):
|
|
||||||
bynodeidexc = True
|
|
||||||
if bynodeidinc:
|
|
||||||
return
|
|
||||||
if bynodeidexc:
|
|
||||||
return
|
|
||||||
byrole = False
|
|
||||||
if brstr in filterconf:
|
|
||||||
for role in self.roles:
|
|
||||||
if role in filterconf[brstr].keys():
|
|
||||||
logging.info('logs_filter: apply filter for role %s' % role)
|
|
||||||
byrole = True
|
|
||||||
if self.apply_include_filter(filterconf[brstr][role]):
|
|
||||||
byrole = True
|
|
||||||
if not byrole:
|
|
||||||
if 'default' in filterconf:
|
|
||||||
self.apply_include_filter(filterconf['default'])
|
|
||||||
else:
|
|
||||||
# unexpected
|
|
||||||
logging.warning('default log filter is not defined')
|
|
||||||
self.flogs = {}
|
|
||||||
byrole = False
|
|
||||||
if brstr in filterconf:
|
|
||||||
for role in self.roles:
|
|
||||||
if role in filterconf[brstr].keys():
|
|
||||||
logging.info('logs_filter: apply filter for role %s' % role)
|
|
||||||
if self.apply_exclude_filter(filterconf[brstr][role]):
|
|
||||||
byrole = True
|
|
||||||
if not byrole:
|
|
||||||
if 'default' in filterconf:
|
|
||||||
logging.info('logs_filter: apply default exclude filter')
|
|
||||||
self.apply_exclude_filter(filterconf['default'])
|
|
||||||
|
|
||||||
def log_size_from_find(self, path, sshopts, timeout=5):
|
|
||||||
cmd = ("find '%s' -type f -exec du -b {} +" % (path))
|
cmd = ("find '%s' -type f -exec du -b {} +" % (path))
|
||||||
logging.info('log_size_from_find: node: %s, logs du-cmd: %s' % (self.node_id, cmd))
|
logging.info('logs_populate: node: %s, logs du-cmd: %s' %
|
||||||
outs, errs, code = ssh_node(ip=self.ip,
|
(self.node_id, cmd))
|
||||||
|
outs, errs, code = tools.ssh_node(ip=self.ip,
|
||||||
command=cmd,
|
command=cmd,
|
||||||
ssh_opts=self.ssh_opts,
|
ssh_opts=self.ssh_opts,
|
||||||
env_vars='',
|
env_vars='',
|
||||||
@ -321,11 +258,15 @@ class Node(object):
|
|||||||
logging.error("node: %s, ip: %s, command: %s, "
|
logging.error("node: %s, ip: %s, command: %s, "
|
||||||
"timeout code: %s, error message: %s" %
|
"timeout code: %s, error message: %s" %
|
||||||
(self.node_id, self.ip, cmd, code, errs))
|
(self.node_id, self.ip, cmd, code, errs))
|
||||||
self.dulogs = ""
|
break
|
||||||
return False
|
if len(outs):
|
||||||
self.dulogs = outs
|
got_logs = True
|
||||||
logging.info('log_size_from_find: dulogs: %s' % (self.dulogs))
|
for line in outs.split('\n'):
|
||||||
return True
|
if '\t' in line:
|
||||||
|
size, filename = line.split('\t')
|
||||||
|
self.logs[filename] = int(size)
|
||||||
|
logging.debug('logs_populate: logs: %s' % (self.logs))
|
||||||
|
return got_logs
|
||||||
|
|
||||||
def print_files(self):
|
def print_files(self):
|
||||||
for k in self.files.keys():
|
for k in self.files.keys():
|
||||||
@ -349,14 +290,14 @@ class Nodes(object):
|
|||||||
"""Class nodes """
|
"""Class nodes """
|
||||||
|
|
||||||
def __init__(self, cluster, extended, conf, filename=None):
|
def __init__(self, cluster, extended, conf, filename=None):
|
||||||
import_subprocess()
|
|
||||||
self.dirname = conf.rqdir.rstrip('/')
|
self.dirname = conf.rqdir.rstrip('/')
|
||||||
if (not os.path.exists(self.dirname)):
|
if (not os.path.exists(self.dirname)):
|
||||||
logging.error("directory %s doesn't exist" % (self.dirname))
|
logging.error("directory %s doesn't exist" % (self.dirname))
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
self.files = get_dir_structure(conf.rqdir)[os.path.basename(self.dirname)]
|
dn = os.path.basename(self.dirname)
|
||||||
|
self.files = tools.get_dir_structure(conf.rqdir)[dn]
|
||||||
if (conf.fuelip is None) or (conf.fuelip == ""):
|
if (conf.fuelip is None) or (conf.fuelip == ""):
|
||||||
logging.error('Nodes: looks like fuelip is not set(%s)' % conf.fuelip)
|
logging.error('looks like fuelip is not set(%s)' % conf.fuelip)
|
||||||
sys.exit(7)
|
sys.exit(7)
|
||||||
self.fuelip = conf.fuelip
|
self.fuelip = conf.fuelip
|
||||||
self.conf = conf
|
self.conf = conf
|
||||||
@ -375,7 +316,6 @@ class Nodes(object):
|
|||||||
self.load_nodes(conf)
|
self.load_nodes(conf)
|
||||||
self.get_version()
|
self.get_version()
|
||||||
|
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
s = "#node-id, cluster, admin-ip, mac, os, roles, online, status\n"
|
s = "#node-id, cluster, admin-ip, mac, os, roles, online, status\n"
|
||||||
for node in sorted(self.nodes.values(), key=lambda x: x.node_id):
|
for node in sorted(self.nodes.values(), key=lambda x: x.node_id):
|
||||||
@ -397,7 +337,7 @@ class Nodes(object):
|
|||||||
online=True,
|
online=True,
|
||||||
ip=self.fuelip,
|
ip=self.fuelip,
|
||||||
conf=conf)
|
conf=conf)
|
||||||
nodes_json, err, code = ssh_node(ip=self.fuelip,
|
nodes_json, err, code = tools.ssh_node(ip=self.fuelip,
|
||||||
command=fuel_node_cmd,
|
command=fuel_node_cmd,
|
||||||
ssh_opts=fuelnode.ssh_opts,
|
ssh_opts=fuelnode.ssh_opts,
|
||||||
env_vars="",
|
env_vars="",
|
||||||
@ -410,17 +350,20 @@ class Nodes(object):
|
|||||||
|
|
||||||
def pass_hard_filter(self, node):
|
def pass_hard_filter(self, node):
|
||||||
if self.conf.hard_filter:
|
if self.conf.hard_filter:
|
||||||
if self.conf.hard_filter.status and (node.status not in self.conf.hard_filter.status):
|
if (self.conf.hard_filter.status and
|
||||||
logging.info("hard filter by status: excluding node-%s" % node.node_id)
|
(node.status not in self.conf.hard_filter.status)):
|
||||||
|
logging.info("hard filter by status: excluding node-%s" %
|
||||||
|
node.node_id)
|
||||||
return False
|
return False
|
||||||
if (isinstance(self.conf.hard_filter.online, bool) and
|
if (isinstance(self.conf.hard_filter.online, bool) and
|
||||||
(bool(node.online) != bool(self.conf.hard_filter.online))):
|
(bool(node.online) != self.conf.hard_filter.online)):
|
||||||
logging.info("hard filter by online: excluding node-%s" % node.node_id)
|
logging.info("hard filter by online: excluding node-%s" %
|
||||||
|
node.node_id)
|
||||||
return False
|
return False
|
||||||
if (self.conf.hard_filter.node_ids and
|
if (self.conf.hard_filter.node_ids and
|
||||||
((int(node.node_id) not in self.conf.hard_filter.node_ids) and
|
(int(node.node_id) not in self.conf.hard_filter.node_ids)):
|
||||||
(str(node.node_id) not in self.conf.hard_filter.node_ids))):
|
logging.info("hard filter by ids: excluding node-%s" %
|
||||||
logging.info("hard filter by ids: excluding node-%s" % node.node_id)
|
node.node_id)
|
||||||
return False
|
return False
|
||||||
if self.conf.hard_filter.roles:
|
if self.conf.hard_filter.roles:
|
||||||
ok_roles = []
|
ok_roles = []
|
||||||
@ -428,7 +371,8 @@ class Nodes(object):
|
|||||||
if role in self.conf.hard_filter.roles:
|
if role in self.conf.hard_filter.roles:
|
||||||
ok_roles.append(role)
|
ok_roles.append(role)
|
||||||
if not ok_roles:
|
if not ok_roles:
|
||||||
logging.info("hard filter by roles: excluding node-%s" % node.node_id)
|
logging.info("hard filter by roles: excluding node-%s" %
|
||||||
|
node.node_id)
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -468,9 +412,8 @@ class Nodes(object):
|
|||||||
|
|
||||||
def get_version(self):
|
def get_version(self):
|
||||||
cmd = "awk -F ':' '/release/ {print \$2}' /etc/nailgun/version.yaml"
|
cmd = "awk -F ':' '/release/ {print \$2}' /etc/nailgun/version.yaml"
|
||||||
logging.info('get_version:%s' %self.conf.ssh_opts)
|
|
||||||
fuelnode = self.nodes[self.fuelip]
|
fuelnode = self.nodes[self.fuelip]
|
||||||
release, err, code = ssh_node(ip=fuelnode.ip,
|
release, err, code = tools.ssh_node(ip=fuelnode.ip,
|
||||||
command=cmd,
|
command=cmd,
|
||||||
ssh_opts=fuelnode.ssh_opts,
|
ssh_opts=fuelnode.ssh_opts,
|
||||||
env_vars="",
|
env_vars="",
|
||||||
@ -485,23 +428,23 @@ class Nodes(object):
|
|||||||
def get_release(self):
|
def get_release(self):
|
||||||
cmd = "awk -F ':' '/fuel_version/ {print \$2}' /etc/astute.yaml"
|
cmd = "awk -F ':' '/fuel_version/ {print \$2}' /etc/astute.yaml"
|
||||||
for node in self.nodes.values():
|
for node in self.nodes.values():
|
||||||
# skip master
|
|
||||||
if node.node_id == 0:
|
if node.node_id == 0:
|
||||||
|
# skip master
|
||||||
node.release = self.version
|
node.release = self.version
|
||||||
if (node.node_id != 0) and (node.status == 'ready'):
|
if (node.node_id != 0) and (node.status == 'ready'):
|
||||||
release, err, code = ssh_node(ip=node.ip,
|
release, err, code = tools.ssh_node(ip=node.ip,
|
||||||
command=cmd,
|
command=cmd,
|
||||||
sshopts=self.sshopts,
|
ssh_opts=node.sshopts,
|
||||||
sshvars='',
|
timeout=node.timeout)
|
||||||
timeout=self.timeout,
|
|
||||||
filename=None)
|
|
||||||
if code != 0:
|
if code != 0:
|
||||||
logging.warning("get_release: node: %s: Can't get node release" %
|
logging.warning("get_release: node: %s: %s" %
|
||||||
(node.node_id))
|
(node.node_id, "Can't get node release"))
|
||||||
node.release = self.version
|
node.release = None
|
||||||
continue
|
continue
|
||||||
|
else:
|
||||||
node.release = release.strip('\n "\'')
|
node.release = release.strip('\n "\'')
|
||||||
logging.info("get_release: node: %s, release: %s" % (node.node_id, node.release))
|
logging.info("get_release: node: %s, release: %s" %
|
||||||
|
(node.node_id, node.release))
|
||||||
|
|
||||||
def get_node_file_list(self):
|
def get_node_file_list(self):
|
||||||
for key in self.files.keys():
|
for key in self.files.keys():
|
||||||
@ -531,71 +474,61 @@ class Nodes(object):
|
|||||||
for node in self.nodes.values():
|
for node in self.nodes.values():
|
||||||
logging.debug('%s' % node.files[ckey])
|
logging.debug('%s' % node.files[ckey])
|
||||||
|
|
||||||
|
def exec_filter(self, node):
|
||||||
|
f = self.conf.soft_filter
|
||||||
|
if f:
|
||||||
|
result = (((not f.status) or (node.status in f.status)) and
|
||||||
|
((not f.roles) or (node.role in f.roles)) and
|
||||||
|
((not f.node_ids) or (node.node_id in f.node_ids)))
|
||||||
|
else:
|
||||||
|
result = True
|
||||||
|
return result and (((self.cluster and node.cluster != 0 and
|
||||||
|
str(self.cluster) == str(node.cluster)) or not
|
||||||
|
self.cluster) and node.online)
|
||||||
|
|
||||||
def launch_ssh(self, odir='info', timeout=15, fake=False):
|
def launch_ssh(self, odir='info', timeout=15, fake=False):
|
||||||
lock = flock.FLock('/tmp/timmy-cmds.lock')
|
lock = flock.FLock('/tmp/timmy-cmds.lock')
|
||||||
if not lock.lock():
|
if not lock.lock():
|
||||||
logging.warning('Unable to obtain lock, skipping "cmds"-part')
|
logging.warning('Unable to obtain lock, skipping "cmds"-part')
|
||||||
return ''
|
return ''
|
||||||
|
try:
|
||||||
label = ckey
|
label = ckey
|
||||||
threads = []
|
run_items = []
|
||||||
sem = threading.BoundedSemaphore(value=100)
|
for n in [n for n in self.nodes.values() if self.exec_filter(n)]:
|
||||||
for node in self.nodes.values():
|
run_items.append(tools.RunItem(target=n.exec_cmd,
|
||||||
if (self.cluster and str(self.cluster) != str(node.cluster) and
|
args={'label': label,
|
||||||
node.cluster != 0):
|
'odir': odir,
|
||||||
continue
|
'fake': fake}))
|
||||||
if node.status in self.conf.soft_filter.status and node.online:
|
tools.run_batch(run_items, 100)
|
||||||
sem.acquire(True)
|
finally:
|
||||||
t = threading.Thread(target=semaphore_release,
|
|
||||||
args=(sem,
|
|
||||||
node.exec_cmd,
|
|
||||||
node.node_id,
|
|
||||||
[label,
|
|
||||||
odir,
|
|
||||||
fake]))
|
|
||||||
threads.append(t)
|
|
||||||
t.start()
|
|
||||||
for t in threads:
|
|
||||||
t.join()
|
|
||||||
lock.unlock()
|
lock.unlock()
|
||||||
|
|
||||||
def filter_logs(self):
|
|
||||||
for node in self.nodes.values():
|
|
||||||
if (self.cluster and str(self.cluster) != str(node.cluster) and
|
|
||||||
node.cluster != 0):
|
|
||||||
continue
|
|
||||||
if node.status in self.conf.soft_filter.status and node.online:
|
|
||||||
node.logs_filter(self.conf.log_files['filter'])
|
|
||||||
logging.debug('filter logs: node-%s: filtered logs: %s' %
|
|
||||||
(node.node_id, node.flogs))
|
|
||||||
|
|
||||||
def calculate_log_size(self, timeout=15):
|
def calculate_log_size(self, timeout=15):
|
||||||
lsize = 0
|
total_size = 0
|
||||||
for node in self.nodes.values():
|
for node in [n for n in self.nodes.values() if self.exec_filter(n)]:
|
||||||
if (self.cluster and str(self.cluster) != str(node.cluster) and
|
if not node.logs_populate(5):
|
||||||
node.cluster != 0):
|
logging.warning("can't get log file list from node %s" %
|
||||||
continue
|
node.node_id)
|
||||||
if node.status in self.conf.soft_filter.status and node.online:
|
else:
|
||||||
if not node.log_size_from_find(self.conf.log_files['path'],5):
|
node.logs_filter()
|
||||||
logging.warning("can't get log file list from node %s" % node.node_id)
|
logging.debug('filter logs: node-%s: filtered logs: %s' %
|
||||||
self.filter_logs()
|
(node.node_id, node.logs))
|
||||||
for node in self.nodes.values():
|
total_size += sum(node.logs.values())
|
||||||
for f in node.flogs:
|
logging.info('Full log size on nodes(with fuel): %s bytes' %
|
||||||
lsize += node.flogs[f]
|
total_size)
|
||||||
for fl in sorted(node.flogs.items(), key=lambda x: x[1]):
|
self.alogsize = total_size / 1024
|
||||||
logging.debug(fl)
|
|
||||||
logging.info('Full log size on nodes(with fuel): %s bytes' % lsize)
|
|
||||||
self.alogsize = lsize / 1024
|
|
||||||
|
|
||||||
def is_enough_space(self, directory, coefficient=1.2):
|
def is_enough_space(self, directory, coefficient=1.2):
|
||||||
mdir(directory)
|
tools.mdir(directory)
|
||||||
outs, errs, code = free_space(directory, timeout=1)
|
outs, errs, code = tools.free_space(directory, timeout=1)
|
||||||
if code != 0:
|
if code != 0:
|
||||||
logging.error("Can't get free space: %s" % errs)
|
logging.error("Can't get free space: %s" % errs)
|
||||||
return False
|
return False
|
||||||
try:
|
try:
|
||||||
fs = int(outs.rstrip('\n'))
|
fs = int(outs.rstrip('\n'))
|
||||||
except:
|
except:
|
||||||
logging.error("is_enough_space: can't get free space\nouts: %s" % outs)
|
logging.error("is_enough_space: can't get free space\nouts: %s" %
|
||||||
|
outs)
|
||||||
return False
|
return False
|
||||||
logging.info('logsize: %s Kb, free space: %s Kb' % (self.alogsize, fs))
|
logging.info('logsize: %s Kb, free space: %s Kb' % (self.alogsize, fs))
|
||||||
if (self.alogsize*coefficient > fs):
|
if (self.alogsize*coefficient > fs):
|
||||||
@ -606,9 +539,9 @@ class Nodes(object):
|
|||||||
|
|
||||||
def create_archive_general(self, directory, outfile, timeout):
|
def create_archive_general(self, directory, outfile, timeout):
|
||||||
cmd = "tar jcf '%s' -C %s %s" % (outfile, directory, ".")
|
cmd = "tar jcf '%s' -C %s %s" % (outfile, directory, ".")
|
||||||
mdir(self.conf.archives)
|
tools.mdir(self.conf.archives)
|
||||||
logging.debug("create_archive_general: cmd: %s" % cmd)
|
logging.debug("create_archive_general: cmd: %s" % cmd)
|
||||||
outs, errs, code = launch_cmd(command=cmd,
|
outs, errs, code = tools.launch_cmd(command=cmd,
|
||||||
timeout=timeout)
|
timeout=timeout)
|
||||||
if code != 0:
|
if code != 0:
|
||||||
logging.error("Can't create archive %s" % (errs))
|
logging.error("Can't create archive %s" % (errs))
|
||||||
@ -617,10 +550,11 @@ class Nodes(object):
|
|||||||
'''Returns interface speed through which logs will be dowloaded'''
|
'''Returns interface speed through which logs will be dowloaded'''
|
||||||
for node in self.nodes.values():
|
for node in self.nodes.values():
|
||||||
if not (node.ip == 'localhost' or node.ip.startswith('127.')):
|
if not (node.ip == 'localhost' or node.ip.startswith('127.')):
|
||||||
cmd = "cat /sys/class/net/$(/sbin/ip -o route get %s | cut -d' ' -f3)/speed" % node.ip
|
cmd = ("%s$(/sbin/ip -o route get %s | cut -d' ' -f3)/speed" %
|
||||||
out, err, code = launch_cmd(cmd, node.timeout)
|
('cat /sys/class/net/', node.ip))
|
||||||
|
out, err, code = tools.launch_cmd(cmd, node.timeout)
|
||||||
if code != 0:
|
if code != 0:
|
||||||
logging.error("can't get interface speed: error message: %s" % err)
|
logging.error("can't get interface speed: error: %s" % err)
|
||||||
return defspeed
|
return defspeed
|
||||||
try:
|
try:
|
||||||
speed = int(out)
|
speed = int(out)
|
||||||
@ -628,105 +562,65 @@ class Nodes(object):
|
|||||||
speed = defspeed
|
speed = defspeed
|
||||||
return speed
|
return speed
|
||||||
|
|
||||||
def create_log_archives(self, outdir, timeout, fake=False, maxthreads=10, speed=100):
|
def archive_logs(self, outdir, timeout,
|
||||||
|
fake=False, maxthreads=10, speed=100):
|
||||||
if fake:
|
if fake:
|
||||||
logging.info('create_log_archives: skip creating archives(fake:%s)' % fake)
|
logging.info('archive_logs:skip creating archives(fake:%s)' % fake)
|
||||||
return
|
return
|
||||||
threads = []
|
|
||||||
txtfl = []
|
txtfl = []
|
||||||
speed = self.find_adm_interface_speed(speed)
|
speed = self.find_adm_interface_speed(speed)
|
||||||
if len(self.nodes) > maxthreads:
|
speed = int(speed * 0.9 / min(maxthreads, len(self.nodes)))
|
||||||
speed = int(speed * 0.9 / maxthreads)
|
pythonslowpipe = tools.slowpipe % speed
|
||||||
else:
|
run_items = []
|
||||||
speed = int(speed * 0.9 / len(self.nodes))
|
for node in [n for n in self.nodes.values() if self.exec_filter(n)]:
|
||||||
pythonslowpipe = 'import sys\n'
|
|
||||||
pythonslowpipe += 'import time\n'
|
|
||||||
pythonslowpipe += 'while 1:\n'
|
|
||||||
pythonslowpipe += ' a = sys.stdin.read(int(1250*%s))\n' % speed
|
|
||||||
pythonslowpipe += ' if a:\n'
|
|
||||||
pythonslowpipe += ' sys.stdout.write(a)\n'
|
|
||||||
pythonslowpipe += ' time.sleep(0.01)\n'
|
|
||||||
pythonslowpipe += ' else:\n'
|
|
||||||
pythonslowpipe += ' break\n'
|
|
||||||
sem = threading.BoundedSemaphore(value=maxthreads)
|
|
||||||
for node in self.nodes.values():
|
|
||||||
if (self.cluster and str(self.cluster) != str(node.cluster) and
|
|
||||||
node.cluster != 0):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if node.status in self.conf.soft_filter.status and node.online:
|
|
||||||
sem.acquire(True)
|
|
||||||
node.archivelogsfile = os.path.join(outdir,
|
node.archivelogsfile = os.path.join(outdir,
|
||||||
'logs-node-'+str(node.node_id) + '.tar.gz')
|
'logs-node-%s.tar.gz' %
|
||||||
mdir(outdir)
|
str(node.node_id))
|
||||||
|
tools.mdir(outdir)
|
||||||
logslistfile = node.archivelogsfile + '.txt'
|
logslistfile = node.archivelogsfile + '.txt'
|
||||||
txtfl.append(logslistfile)
|
txtfl.append(logslistfile)
|
||||||
try:
|
try:
|
||||||
with open(logslistfile, 'w') as llf:
|
with open(logslistfile, 'w') as llf:
|
||||||
for line in node.flogs:
|
for filename in node.logs:
|
||||||
llf.write(line+"\0")
|
llf.write(filename+"\0")
|
||||||
except:
|
except:
|
||||||
logging.error("create_archive_logs: Can't write to file %s" % logslistfile)
|
logging.error("create_archive_logs: Can't write to file %s" %
|
||||||
|
logslistfile)
|
||||||
continue
|
continue
|
||||||
if node.ip == 'localhost' or node.ip.startswith('127.'):
|
|
||||||
cmd = "tar --gzip --create --file - --null --files-from -"
|
cmd = "tar --gzip --create --file - --null --files-from -"
|
||||||
else:
|
if not (node.ip == 'localhost' or node.ip.startswith('127.')):
|
||||||
cmd = "tar --gzip --create --file - --null --files-from - | python -c '%s'" % pythonslowpipe
|
cmd = ' '.join([cmd, "| python -c '%s'" % pythonslowpipe])
|
||||||
t = threading.Thread(target=semaphore_release,
|
args = {'cmd': cmd,
|
||||||
args=(sem,
|
'infile': logslistfile,
|
||||||
node.exec_simple_cmd,
|
'outfile': node.archivelogsfile,
|
||||||
node.node_id,
|
'timeout': timeout}
|
||||||
[cmd,
|
run_items.append(tools.RunItem(target=node.exec_simple_cmd,
|
||||||
logslistfile,
|
args=args))
|
||||||
node.archivelogsfile,
|
tools.run_batch(run_items, maxthreads)
|
||||||
timeout]
|
|
||||||
)
|
|
||||||
)
|
|
||||||
threads.append(t)
|
|
||||||
t.start()
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
tt = []
|
|
||||||
for t in threads:
|
|
||||||
if t is not None and t.isAlive():
|
|
||||||
t.join(1)
|
|
||||||
else:
|
|
||||||
tt.append(t)
|
|
||||||
if len(threads) == len(tt):
|
|
||||||
break
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
#sys.exit(9)
|
|
||||||
killall_children(self.timeout)
|
|
||||||
raise KeyboardInterrupt()
|
|
||||||
|
|
||||||
for tfile in txtfl:
|
for tfile in txtfl:
|
||||||
try:
|
try:
|
||||||
os.remove(tfile)
|
os.remove(tfile)
|
||||||
except:
|
except:
|
||||||
logging.error("create_log_archives: can't delete file %s" % tfile)
|
logging.error("archive_logs: can't delete file %s" % tfile)
|
||||||
|
|
||||||
def get_conf_files(self, odir=fkey, timeout=15):
|
def get_conf_files(self, odir=fkey, timeout=15):
|
||||||
if fkey not in self.files:
|
if fkey not in self.files:
|
||||||
logging.warning("get_conf_files: %s directory does not exist" % fkey)
|
logging.warning("get_conf_files: %s directory doesn't exist" %
|
||||||
|
fkey)
|
||||||
return
|
return
|
||||||
lock = flock.FLock('/tmp/timmy-files.lock')
|
lock = flock.FLock('/tmp/timmy-files.lock')
|
||||||
if not lock.lock():
|
if not lock.lock():
|
||||||
logging.warning('Unable to obtain lock, skipping "files"-part')
|
logging.warning('Unable to obtain lock, skipping "files"-part')
|
||||||
return ''
|
return ''
|
||||||
|
try:
|
||||||
label = fkey
|
label = fkey
|
||||||
threads = []
|
run_items = []
|
||||||
for node in self.nodes.values():
|
for n in [n for n in self.nodes.values() if self.exec_filter(n)]:
|
||||||
if (self.cluster and str(self.cluster) != str(node.cluster) and
|
run_items.append(tools.RunItem(target=n.get_files,
|
||||||
node.cluster != 0):
|
args={'label': label,
|
||||||
continue
|
'odir': odir}))
|
||||||
if node.status in self.conf.soft_filter.status and node.online:
|
tools.run_batch(run_items, 10)
|
||||||
t = threading.Thread(target=node.get_files,
|
finally:
|
||||||
args=(label,
|
|
||||||
odir,))
|
|
||||||
threads.append(t)
|
|
||||||
t.start()
|
|
||||||
for t in threads:
|
|
||||||
t.join()
|
|
||||||
lock.unlock()
|
lock.unlock()
|
||||||
|
|
||||||
|
|
||||||
|
136
timmy/tools.py
136
timmy/tools.py
@ -22,37 +22,73 @@ tools module
|
|||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
import threading
|
||||||
|
import multiprocessing
|
||||||
def import_subprocess():
|
|
||||||
if 'subprocess' not in globals():
|
|
||||||
global subprocess
|
|
||||||
global ok_python
|
|
||||||
try:
|
|
||||||
import subprocess32 as subprocess
|
|
||||||
logging.info("using improved subprocess32 module\n")
|
|
||||||
ok_python = True
|
|
||||||
except:
|
|
||||||
import subprocess
|
import subprocess
|
||||||
logging.warning(("Please upgrade the module 'subprocess' to the latest version: "
|
|
||||||
"https://pypi.python.org/pypi/subprocess32/"))
|
|
||||||
ok_python = True
|
|
||||||
if sys.version_info > (2, 7, 0):
|
|
||||||
ok_python = False
|
|
||||||
logging.warning('this subprocess module does not support timeouts')
|
|
||||||
else:
|
|
||||||
logging.info('subprocess is already loaded')
|
|
||||||
|
|
||||||
def semaphore_release(sema, func, node_id, params):
|
|
||||||
logging.info('start ssh node: %s' % node_id)
|
slowpipe = '''
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
while 1:
|
||||||
|
a = sys.stdin.read(int(1250*%s))
|
||||||
|
if a:
|
||||||
|
sys.stdout.write(a)
|
||||||
|
time.sleep(0.01)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
def interrupt_wrapper(f):
|
||||||
|
def wrapper(*args, **kwargs):
|
||||||
try:
|
try:
|
||||||
result = func(*params)
|
f(*args, **kwargs)
|
||||||
except:
|
except KeyboardInterrupt:
|
||||||
logging.error("failed to launch: %s on node %s" % node_id)
|
logging.warning('Interrupted, exiting.')
|
||||||
|
return wrapper
|
||||||
|
|
||||||
|
|
||||||
|
class RunItem():
|
||||||
|
def __init__(self, target, args):
|
||||||
|
self.target = target
|
||||||
|
self.args = args
|
||||||
|
self.process = None
|
||||||
|
|
||||||
|
|
||||||
|
class SemaphoreProcess(multiprocessing.Process):
|
||||||
|
def __init__(self, semaphore, target, args):
|
||||||
|
multiprocessing.Process.__init__(self)
|
||||||
|
self.semaphore = semaphore
|
||||||
|
self.target = target
|
||||||
|
self.args = args
|
||||||
|
|
||||||
|
def run(self):
|
||||||
|
try:
|
||||||
|
self.target(**self.args)
|
||||||
finally:
|
finally:
|
||||||
sema.release()
|
logging.debug('finished call: %s' % self.target)
|
||||||
logging.info('finish ssh node: %s' % node_id)
|
self.semaphore.release()
|
||||||
return result
|
|
||||||
|
|
||||||
|
def run_batch(item_list, maxthreads):
|
||||||
|
semaphore = multiprocessing.BoundedSemaphore(maxthreads)
|
||||||
|
try:
|
||||||
|
for run_item in item_list:
|
||||||
|
semaphore.acquire(True)
|
||||||
|
p = SemaphoreProcess(target=run_item.target,
|
||||||
|
semaphore=semaphore,
|
||||||
|
args=run_item.args)
|
||||||
|
run_item.process = p
|
||||||
|
p.start()
|
||||||
|
for run_item in item_list:
|
||||||
|
run_item.process.join()
|
||||||
|
run_item.process = None
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
for run_item in item_list:
|
||||||
|
if run_item.process:
|
||||||
|
run_item.process.terminate()
|
||||||
|
raise KeyboardInterrupt()
|
||||||
|
|
||||||
|
|
||||||
def get_dir_structure(rootdir):
|
def get_dir_structure(rootdir):
|
||||||
@ -85,27 +121,34 @@ def mdir(directory):
|
|||||||
|
|
||||||
|
|
||||||
def launch_cmd(command, timeout):
|
def launch_cmd(command, timeout):
|
||||||
|
def _timeout_terminate(pid):
|
||||||
|
try:
|
||||||
|
os.kill(pid, 15)
|
||||||
|
logging.error("launch_cmd: pid %d killed by timeout" % pid)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
logging.info('launch_cmd: command %s' % command)
|
logging.info('launch_cmd: command %s' % command)
|
||||||
p = subprocess.Popen(command,
|
p = subprocess.Popen(command,
|
||||||
shell=True,
|
shell=True,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE)
|
stderr=subprocess.PIPE)
|
||||||
if ok_python:
|
timeout_killer = None
|
||||||
try:
|
|
||||||
outs, errs = p.communicate(timeout=timeout+1)
|
|
||||||
except subprocess.TimeoutExpired:
|
|
||||||
p.kill()
|
|
||||||
outs, errs = p.communicate()
|
|
||||||
logging.error("command: %s err: %s, returned: %s" %
|
|
||||||
(command, errs, p.returncode))
|
|
||||||
else:
|
|
||||||
try:
|
try:
|
||||||
|
timeout_killer = threading.Timer(timeout, _timeout_terminate, [p.pid])
|
||||||
|
timeout_killer.start()
|
||||||
outs, errs = p.communicate()
|
outs, errs = p.communicate()
|
||||||
except:
|
except:
|
||||||
|
try:
|
||||||
p.kill()
|
p.kill()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
outs, errs = p.communicate()
|
outs, errs = p.communicate()
|
||||||
logging.error("command: %s err: %s, returned: %s" %
|
logging.error("command: %s err: %s, returned: %s" %
|
||||||
(command, errs, p.returncode))
|
(command, errs, p.returncode))
|
||||||
|
finally:
|
||||||
|
if timeout_killer:
|
||||||
|
timeout_killer.cancel()
|
||||||
logging.debug("ssh return: err:%s\nouts:%s\ncode:%s" %
|
logging.debug("ssh return: err:%s\nouts:%s\ncode:%s" %
|
||||||
(errs, outs, p.returncode))
|
(errs, outs, p.returncode))
|
||||||
logging.info("ssh return: err:%s\ncode:%s" %
|
logging.info("ssh return: err:%s\ncode:%s" %
|
||||||
@ -115,8 +158,10 @@ def launch_cmd(command, timeout):
|
|||||||
|
|
||||||
def ssh_node(ip, command, ssh_opts=[], env_vars=[], timeout=15, filename=None,
|
def ssh_node(ip, command, ssh_opts=[], env_vars=[], timeout=15, filename=None,
|
||||||
inputfile=None, outputfile=None, prefix='nice -n 19 ionice -c 3'):
|
inputfile=None, outputfile=None, prefix='nice -n 19 ionice -c 3'):
|
||||||
#ssh_opts = " ".join(ssh_opts)
|
if type(ssh_opts) is list:
|
||||||
#env_vars = " ".join(env_vars)
|
ssh_opts = ' '.join(ssh_opts)
|
||||||
|
if type(env_vars) is list:
|
||||||
|
env_vars = ' '.join(env_vars)
|
||||||
if (ip in ['localhost', '127.0.0.1']) or ip.startswith('127.'):
|
if (ip in ['localhost', '127.0.0.1']) or ip.startswith('127.'):
|
||||||
logging.info("skip ssh")
|
logging.info("skip ssh")
|
||||||
bstr = "%s timeout '%s' bash -c " % (
|
bstr = "%s timeout '%s' bash -c " % (
|
||||||
@ -135,9 +180,12 @@ def ssh_node(ip, command, ssh_opts=[], env_vars=[], timeout=15, filename=None,
|
|||||||
logging.info("ssh_node: inputfile selected, cmd: %s" % cmd)
|
logging.info("ssh_node: inputfile selected, cmd: %s" % cmd)
|
||||||
if outputfile is not None:
|
if outputfile is not None:
|
||||||
cmd += ' > "' + outputfile + '"'
|
cmd += ' > "' + outputfile + '"'
|
||||||
|
cmd = ("trap 'kill $pid' 15; " +
|
||||||
|
"trap 'kill $pid' 2; " + cmd + '&:; pid=$!; wait $!')
|
||||||
outs, errs, code = launch_cmd(cmd, timeout)
|
outs, errs, code = launch_cmd(cmd, timeout)
|
||||||
return outs, errs, code
|
return outs, errs, code
|
||||||
|
|
||||||
|
|
||||||
def killall_children(timeout):
|
def killall_children(timeout):
|
||||||
cmd = 'ps -o pid --ppid %d --noheaders' % os.getpid()
|
cmd = 'ps -o pid --ppid %d --noheaders' % os.getpid()
|
||||||
out, errs, code = launch_cmd(cmd, timeout)
|
out, errs, code = launch_cmd(cmd, timeout)
|
||||||
@ -166,7 +214,10 @@ def killall_children(timeout):
|
|||||||
except:
|
except:
|
||||||
logging.warning('could not kill %s' % p)
|
logging.warning('could not kill %s' % p)
|
||||||
|
|
||||||
|
|
||||||
def get_files_rsync(ip, data, ssh_opts, dpath, timeout=15):
|
def get_files_rsync(ip, data, ssh_opts, dpath, timeout=15):
|
||||||
|
if type(ssh_opts) is list:
|
||||||
|
ssh_opts = ' '.join(ssh_opts)
|
||||||
if (ip in ['localhost', '127.0.0.1']) or ip.startswith('127.'):
|
if (ip in ['localhost', '127.0.0.1']) or ip.startswith('127.'):
|
||||||
logging.info("skip ssh rsync")
|
logging.info("skip ssh rsync")
|
||||||
cmd = ("timeout '%s' rsync -avzr --files-from=- / '%s'"
|
cmd = ("timeout '%s' rsync -avzr --files-from=- / '%s'"
|
||||||
@ -185,15 +236,6 @@ def get_files_rsync(ip, data, ssh_opts, dpath, timeout=15):
|
|||||||
stdin=subprocess.PIPE,
|
stdin=subprocess.PIPE,
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE)
|
stderr=subprocess.PIPE)
|
||||||
if ok_python:
|
|
||||||
try:
|
|
||||||
outs, errs = p.communicate(input=data, timeout=timeout+1)
|
|
||||||
except subprocess.TimeoutExpired:
|
|
||||||
p.kill()
|
|
||||||
outs, errs = p.communicate()
|
|
||||||
logging.error("ip: %s, command: %s err: %s, returned: %s" %
|
|
||||||
(ip, cmd, errs, p.returncode))
|
|
||||||
else:
|
|
||||||
try:
|
try:
|
||||||
outs, errs = p.communicate(input=data)
|
outs, errs = p.communicate(input=data)
|
||||||
except:
|
except:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user