added: logs collector from nodes
This commit is contained in:
parent
4c34b498cf
commit
926a824023
1
conf.py
1
conf.py
@ -17,6 +17,7 @@ class Conf(object):
|
|||||||
rqdir = './rq'
|
rqdir = './rq'
|
||||||
logdir = './info'
|
logdir = './info'
|
||||||
compress_timeout = 3600
|
compress_timeout = 3600
|
||||||
|
archives = '/tmp/timmy/archives'
|
||||||
find = {'template': "-name '*.gz' -o -name '*.log' -o -name '*-[0-9]4'",
|
find = {'template': "-name '*.gz' -o -name '*.log' -o -name '*-[0-9]4'",
|
||||||
'path': '/var/log/'}
|
'path': '/var/log/'}
|
||||||
|
|
||||||
|
@ -11,11 +11,12 @@ timeout: 15
|
|||||||
find:
|
find:
|
||||||
template: -name '*.log'
|
template: -name '*.log'
|
||||||
log_files:
|
log_files:
|
||||||
default: -name '*.log'
|
path: /var/log
|
||||||
by-role:
|
default: -name '*.log'
|
||||||
|
by_role:
|
||||||
compute: -name '*.log'
|
compute: -name '*.log'
|
||||||
controller: -name '*.log' -name '*.log' -o -name '*.log.1' -o -name '*' ! -path '/var/log/atop*'
|
controller: -name '*.log' -name '*.log' -o -name '*.log.1' -o -name '*' ! -path '/var/log/atop*'
|
||||||
mongo: -name '*.log'
|
mongo: -name '*.log'
|
||||||
ceph: -name '*.log'
|
ceph: -name '*.log'
|
||||||
by-node-id:
|
by_node_id:
|
||||||
0: -name '*.log' -o -name '*.log.1'
|
0: -name '*.log' -o -name '*.log.1'
|
||||||
|
88
nodes.py
88
nodes.py
@ -134,6 +134,20 @@ class Node(object):
|
|||||||
except:
|
except:
|
||||||
logging.error("Can't write to file %s" % dfile)
|
logging.error("Can't write to file %s" % dfile)
|
||||||
|
|
||||||
|
def exec_simple_cmd(self, cmd, outfile, sshvars, sshopts, timeout=15, fake=False):
|
||||||
|
logging.info('node:%s(%s), exec: %s' % (self.node_id, self.ip, cmd))
|
||||||
|
if not fake:
|
||||||
|
outs, errs, code = ssh_node(ip=self.ip,
|
||||||
|
command=cmd,
|
||||||
|
sshvars=sshvars,
|
||||||
|
sshopts=sshopts,
|
||||||
|
timeout=timeout,
|
||||||
|
outputfile=outfile)
|
||||||
|
if code != 0:
|
||||||
|
logging.warning("node: %s, ip: %s, cmdfile: %s,"
|
||||||
|
" code: %s, error message: %s" %
|
||||||
|
(self.node_id, self.ip, cmd, code, errs))
|
||||||
|
|
||||||
def du_logs(self, label, sshopts, odir='info', timeout=15):
|
def du_logs(self, label, sshopts, odir='info', timeout=15):
|
||||||
logging.info('node:%s(%s), filelist: %s' %
|
logging.info('node:%s(%s), filelist: %s' %
|
||||||
(self.node_id, self.ip, label))
|
(self.node_id, self.ip, label))
|
||||||
@ -197,7 +211,7 @@ class Node(object):
|
|||||||
logging.info('template find: %s' % template)
|
logging.info('template find: %s' % template)
|
||||||
cmd = ("find '%s' -type f \( %s \) -exec du -b {} +" %
|
cmd = ("find '%s' -type f \( %s \) -exec du -b {} +" %
|
||||||
(varlogdir, str(template)))
|
(varlogdir, str(template)))
|
||||||
logging.info('node: %s, logs du-cmd: %s' % (self.node_id, cmd))
|
logging.info('log_size_from_find: node: %s, logs du-cmd: %s' % (self.node_id, cmd))
|
||||||
outs, errs, code = ssh_node(ip=self.ip,
|
outs, errs, code = ssh_node(ip=self.ip,
|
||||||
command=cmd,
|
command=cmd,
|
||||||
sshopts=sshopts,
|
sshopts=sshopts,
|
||||||
@ -426,7 +440,7 @@ class Nodes(object):
|
|||||||
if (self.cluster and str(self.cluster) != str(node.cluster) and
|
if (self.cluster and str(self.cluster) != str(node.cluster) and
|
||||||
node.cluster != 0):
|
node.cluster != 0):
|
||||||
continue
|
continue
|
||||||
if node.status in self.conf.soft_filter.status and node.online:
|
if node.status in self.conf.soft_filter.status and node.online and node.fltemplate:
|
||||||
t = threading.Thread(target=node.du_logs,
|
t = threading.Thread(target=node.du_logs,
|
||||||
args=(label,
|
args=(label,
|
||||||
self.sshopts,
|
self.sshopts,
|
||||||
@ -439,11 +453,16 @@ class Nodes(object):
|
|||||||
for node in self.nodes.values():
|
for node in self.nodes.values():
|
||||||
lsize += node.logsize
|
lsize += node.logsize
|
||||||
logging.info('Full log size on nodes: %s bytes' % lsize)
|
logging.info('Full log size on nodes: %s bytes' % lsize)
|
||||||
fuelnode = self.nodes[self.fuelip]
|
#fuelnode = self.nodes[self.fuelip]
|
||||||
if fuelnode.log_size_from_find(template,
|
#if fuelnode.log_size_from_find(template,
|
||||||
|
# self.sshopts,
|
||||||
|
# 5) > 0:
|
||||||
|
# lsize += fuelnode.logsize
|
||||||
|
for node in self.nodes.values():
|
||||||
|
if node.fltemplate and node.log_size_from_find(node.fltemplate,
|
||||||
self.sshopts,
|
self.sshopts,
|
||||||
5) > 0:
|
5) > 0:
|
||||||
lsize += fuelnode.logsize
|
lsize += node.logsize
|
||||||
logging.info('Full log size on nodes(with fuel): %s bytes' % lsize)
|
logging.info('Full log size on nodes(with fuel): %s bytes' % lsize)
|
||||||
self.alogsize = lsize / 1024
|
self.alogsize = lsize / 1024
|
||||||
|
|
||||||
@ -472,21 +491,36 @@ class Nodes(object):
|
|||||||
if code != 0:
|
if code != 0:
|
||||||
logging.error("Can't create archive %s" % (errs))
|
logging.error("Can't create archive %s" % (errs))
|
||||||
|
|
||||||
def create_archive_logs(self, template, outfile, timeout):
|
def create_archive_logs(self, outdir, timeout):
|
||||||
fuelnode = self.nodes[self.fuelip]
|
#fuelnode = self.nodes[self.fuelip]
|
||||||
tstr = '--transform \\"flags=r;s|^|logs/fuel/|\\"'
|
threads = []
|
||||||
cmd = ("find %s -type f \( %s \) -print0 "
|
for node in self.nodes.values():
|
||||||
"| tar --create %s --file - "
|
if (self.cluster and str(self.cluster) != str(node.cluster) and
|
||||||
"--null --files-from -" %
|
node.cluster != 0):
|
||||||
(varlogdir, template, tstr))
|
continue
|
||||||
outs, errs, code = ssh_node(ip=fuelnode.ip,
|
|
||||||
command=cmd,
|
if node.status in self.conf.soft_filter.status and node.online and node.fltemplate:
|
||||||
sshopts=self.sshopts,
|
tstr = ''
|
||||||
sshvars='',
|
cl = 'cluster-%s' % self.cluster
|
||||||
timeout=timeout,
|
node.archivelogsfile = os.path.join(outdir, 'node-'+str(node.node_id) + '.tar')
|
||||||
outputfile=outfile)
|
mdir(outdir)
|
||||||
if code != 0:
|
if str(node.node_id) == '0':
|
||||||
logging.warning("stderr from tar: %s" % (errs))
|
tstr = '--transform \\"flags=r;s|^|logs/fuel/|\\"'
|
||||||
|
cmd = ("find %s -type f \( %s \) -print0 "
|
||||||
|
"| tar --create %s --file - "
|
||||||
|
"--null --files-from -" %
|
||||||
|
(node.flpath, node.fltemplate, tstr))
|
||||||
|
t = threading.Thread(target=node.exec_simple_cmd,
|
||||||
|
args=(cmd,
|
||||||
|
node.archivelogsfile,
|
||||||
|
self.sshvars,
|
||||||
|
self.sshopts,
|
||||||
|
timeout)
|
||||||
|
)
|
||||||
|
threads.append(t)
|
||||||
|
t.start()
|
||||||
|
for t in threads:
|
||||||
|
t.join()
|
||||||
|
|
||||||
def add_logs_archive(self, directory, key, outfile, timeout):
|
def add_logs_archive(self, directory, key, outfile, timeout):
|
||||||
cmd = ("tar --append --file=%s --directory %s %s" %
|
cmd = ("tar --append --file=%s --directory %s %s" %
|
||||||
@ -505,6 +539,20 @@ class Nodes(object):
|
|||||||
if code != 0:
|
if code != 0:
|
||||||
logging.warning("Can't compress archive %s" % (errs))
|
logging.warning("Can't compress archive %s" % (errs))
|
||||||
|
|
||||||
|
def set_template_for_find(self):
|
||||||
|
for node in self.nodes.values():
|
||||||
|
node.flpath = self.conf.log_files['path']
|
||||||
|
node.fltemplate = self.conf.log_files['default']
|
||||||
|
for role in node.roles:
|
||||||
|
if role in self.conf.log_files['by_role'].keys():
|
||||||
|
node.fltemplate = self.conf.log_files['by_role'][role]
|
||||||
|
logging.info('set_template_for_find: break on role %s' %role)
|
||||||
|
break
|
||||||
|
if (self.conf.log_files['by_node_id'] and
|
||||||
|
node.node_id in self.conf.log_files['by_node_id'].keys()):
|
||||||
|
node.fltemplate = self.conf.log_files['by_node_id'][node.node_id]
|
||||||
|
logging.info('set_template_for_find: node: %s, template: %s' %(node.node_id, node.fltemplate) )
|
||||||
|
|
||||||
def get_conf_files(self, odir=fkey, timeout=15):
|
def get_conf_files(self, odir=fkey, timeout=15):
|
||||||
if fkey not in self.files:
|
if fkey not in self.files:
|
||||||
logging.warning("get_conf_files: %s directory does not exist" %(fkey))
|
logging.warning("get_conf_files: %s directory does not exist" %(fkey))
|
||||||
|
4
timmy.py
4
timmy.py
@ -77,11 +77,11 @@ def main(argv=None):
|
|||||||
logging.warning('Unable to obtain lock, skipping "logs"-part')
|
logging.warning('Unable to obtain lock, skipping "logs"-part')
|
||||||
return 1
|
return 1
|
||||||
n.get_node_file_list()
|
n.get_node_file_list()
|
||||||
|
n.set_template_for_find()
|
||||||
n.calculate_log_size(config.find['template'])
|
n.calculate_log_size(config.find['template'])
|
||||||
if n.is_enough_space():
|
if n.is_enough_space():
|
||||||
n.get_log_files(config.outdir)
|
n.get_log_files(config.outdir)
|
||||||
n.create_archive_logs(config.find['template'],
|
n.create_archive_logs(config.archives,
|
||||||
config.logs_archive,
|
|
||||||
config.compress_timeout)
|
config.compress_timeout)
|
||||||
n.add_logs_archive(config.outdir, nodes.lkey,
|
n.add_logs_archive(config.outdir, nodes.lkey,
|
||||||
config.logs_archive, 120)
|
config.logs_archive, 120)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user