stacktach/reports/requests.py
2013-02-11 15:10:34 -06:00

141 lines
4.3 KiB
Python

import datetime
import json
import sys
sys.path.append("/stacktach")
from stacktach import datetime_to_decimal as dt
from stacktach import image_type
from stacktach import models
if __name__ != '__main__':
sys.exit(1)
hours = 0
length = 24
now = datetime.datetime.utcnow()
start = now - datetime.timedelta(hours=hours+length)
end = now - datetime.timedelta(hours=hours)
dnow = dt.dt_to_decimal(now)
dstart = dt.dt_to_decimal(start)
dend = dt.dt_to_decimal(end)
codes = {}
# Get all the instances that have changed in the last N hours ...
updates = models.RawData.objects.filter(event='compute.instance.update',
when__gt=dstart, when__lte=dend)\
.values('instance').distinct()
expiry = 60 * 60 # 1 hour
cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot']
failures = {}
tenant_issues = {}
for uuid_dict in updates:
uuid = uuid_dict['instance']
# All the unique Request ID's for this instance during that timespan.
reqs = models.RawData.objects.filter(instance=uuid,
when__gt=dstart, when__lte=dend) \
.values('request_id').distinct()
for req_dict in reqs:
report = False
req = req_dict['request_id']
raws = models.RawData.objects.filter(request_id=req)\
.exclude(event='compute.instance.exists')\
.order_by('when')
start = None
err = None
operation = None
platform = 0
tenant = 0
dump = False
for raw in raws:
if not start:
start = raw.when
if 'error' in raw.routing_key:
err = raw
report = True
if raw.tenant:
if tenant > 0 and raw.tenant != tenant:
print "Conflicting tenant ID", raw.tenant, tenant
tenant = raw.tenant
for cmd in cmds:
if cmd in raw.event:
operation = cmd
break
if raw.image_type > 0:
platform = raw.image_type
if dump:
print " %s %s T:%s %s %s %s %s %s"\
% (raw.id, raw.routing_key, raw.tenant,
raw.service, raw.host, raw.deployment.name,
raw.event, dt.dt_from_decimal(raw.when))
if raw.event == 'compute.instance.update':
print " State: %s->%s, Task %s->%s" % \
(raw.old_state, raw.state, raw.old_task, raw.task)
if not start:
continue
end = raw.when
diff = end - start
if diff > 3600:
report = True
if report:
print "------", uuid, "----------"
print " Req:", req
print " Duration: %.2f minutes" % (diff / 60)
print " Operation:", operation
print " Platform:", image_type.readable(platform)
key = (operation, platform)
failures[key] = failures.get(key, 0) + 1
tenant_issues[tenant] = tenant_issues.get(tenant, 0) + 1
if err:
queue, body = json.loads(err.json)
payload = body['payload']
print "Error. EventID: %s, Tenant %s, Service %s, Host %s, "\
"Deployment %s, Event %s, When %s"\
% (err.id, err.tenant, err.service, err.host, err.deployment.name,
err.event, dt.dt_from_decimal(err.when))
exc = payload.get('exception')
if exc:
print exc
code = exc.get('kwargs', {}).get('code')
if code:
codes[code] = codes.get(code, 0) + 1
print "-- Failures by operation by platform --"
for failure, count in failures.iteritems():
operation, platform = failure
readable = image_type.readable(platform)
text = "n/a"
if readable:
text = ", ".join(readable)
print "%s on %s = %d" % (operation, text, count)
print "-- Errors by Tenant --"
for tenant, count in tenant_issues.iteritems():
print "T %s = %d" % (tenant, count)
print "-- Return code counts --"
for k, v in codes.iteritems():
print k, v