Merge pull request #85 from TMaddox/error_details_to_json
Error details to json
This commit is contained in:
commit
5edb395e1c
@ -3,4 +3,5 @@ MySQL-python>=1.2.3
|
||||
eventlet>=0.9.17
|
||||
kombu>=2.4.7
|
||||
librabbitmq>=1.0.0
|
||||
prettytable>=0.7.2
|
||||
argparse
|
||||
|
@ -2,11 +2,9 @@ import datetime
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import os
|
||||
|
||||
import prettytable
|
||||
|
||||
sys.path.append("/stacktach")
|
||||
|
||||
sys.path.append(os.environ.get('STACKTACH_INSTALL_DIR', '/stacktach'))
|
||||
from stacktach import datetime_to_decimal as dt
|
||||
from stacktach import image_type
|
||||
from stacktach import models
|
||||
@ -28,14 +26,13 @@ if len(sys.argv) == 2:
|
||||
hours = 0
|
||||
length = 24
|
||||
|
||||
start = datetime.datetime(year=yesterday.year, month=yesterday.month,
|
||||
day=yesterday.day)
|
||||
start = datetime.datetime(year=yesterday.year, month=yesterday.month,
|
||||
day=yesterday.day)
|
||||
end = start + datetime.timedelta(hours=length-1, minutes=59, seconds=59)
|
||||
|
||||
instance_map = {} # { uuid : [request_id, request_id, ...] }
|
||||
metadata = {'raw_text': True, 'instances': instance_map}
|
||||
report = [metadata] # Tell Stacky not to format results.
|
||||
report.append("Generating report for %s to %s" % (start, end))
|
||||
metadata = {'report_format': 'json', 'instances': instance_map}
|
||||
report = [metadata] # Tell Stacky to format as JSON
|
||||
|
||||
dstart = dt.dt_to_decimal(start)
|
||||
dend = dt.dt_to_decimal(end)
|
||||
@ -66,19 +63,19 @@ for uuid_dict in updates:
|
||||
|
||||
# All the unique Request ID's for this instance during that timespan.
|
||||
reqs = models.RawData.objects.filter(instance=uuid,
|
||||
when__gt=dstart, when__lte=dend) \
|
||||
when__gt=dstart, when__lte=dend)\
|
||||
.values('request_id').distinct()
|
||||
|
||||
req_list = []
|
||||
for req_dict in reqs:
|
||||
req = req_dict['request_id']
|
||||
|
||||
raws = list(models.RawData.objects.filter(request_id=req)\
|
||||
.exclude(event='compute.instance.exists')\
|
||||
.values("id", "when", "routing_key", "old_state",
|
||||
"state", "tenant", "event", "image_type",
|
||||
"deployment")\
|
||||
.order_by('when'))
|
||||
raws = list(models.RawData.objects.filter(request_id=req)
|
||||
.exclude(event='compute.instance.exists')
|
||||
.values("id", "when", "routing_key", "old_state",
|
||||
"state", "tenant", "event", "image_type",
|
||||
"deployment")
|
||||
.order_by('when'))
|
||||
|
||||
_start = None
|
||||
err_id = None
|
||||
@ -115,7 +112,7 @@ for uuid_dict in updates:
|
||||
err_id = _id
|
||||
|
||||
if _old_state == 'error' and \
|
||||
(not _state in ['deleted', 'error']):
|
||||
(not _state in ['deleted', 'error']):
|
||||
failure_type = None
|
||||
err_id = None
|
||||
|
||||
@ -143,7 +140,7 @@ for uuid_dict in updates:
|
||||
_end = _when
|
||||
diff = _end - _start
|
||||
|
||||
if diff > 3600 and failure_type == None:
|
||||
if diff > 3600 and failure_type is None:
|
||||
failure_type = ">60"
|
||||
|
||||
key = (operation, image_type_num, cell)
|
||||
@ -161,15 +158,13 @@ for uuid_dict in updates:
|
||||
if not failure_type:
|
||||
successes[key] = successes.get(key, 0) + 1
|
||||
else:
|
||||
failed_request = {}
|
||||
req_list.append(req)
|
||||
instance_map[uuid] = req_list
|
||||
|
||||
report.append('')
|
||||
report.append("------ %s ----------" % uuid)
|
||||
report.append("Req: %s" % req)
|
||||
report.append("Duration: %.2f minutes" % (diff / 60))
|
||||
report.append("Operation: %s" % operation)
|
||||
report.append("Platform: %s" % image_type.readable(image_type_num))
|
||||
failed_request['req'] = req
|
||||
failed_request['duration'] = "%.2f minutes" % (diff/60)
|
||||
failed_request['operation'] = operation
|
||||
failed_request['platform'] = image_type.readable(image_type_num)
|
||||
failures[key] = failures.get(key, 0) + 1
|
||||
tenant_issues[tenant] = tenant_issues.get(tenant, 0) + 1
|
||||
|
||||
@ -177,146 +172,51 @@ for uuid_dict in updates:
|
||||
err = models.RawData.objects.get(id=err_id)
|
||||
queue, body = json.loads(err.json)
|
||||
payload = body['payload']
|
||||
|
||||
report.append("Event ID: %s" % err.id)
|
||||
report.append("Tenant: %s" % err.tenant)
|
||||
report.append("Service: %s" % err.service)
|
||||
report.append("Host: %s" % err.host)
|
||||
report.append("Deployment: %s" % err.deployment.name)
|
||||
report.append("Event: %s" % err.event)
|
||||
report.append("When: %s" % dt.dt_from_decimal(err.when))
|
||||
|
||||
# Add error information to failed request report
|
||||
failed_request['event_id'] = err.id
|
||||
failed_request['tenant'] = err.tenant
|
||||
failed_request['service'] = err.service
|
||||
failed_request['host'] = err.host
|
||||
failed_request['deployment'] = err.deployment.name
|
||||
failed_request['event'] = err.event
|
||||
failed_request['when'] = str(dt.dt_from_decimal(err.when))
|
||||
|
||||
exc = payload.get('exception')
|
||||
if exc:
|
||||
# group the messages ...
|
||||
failed_request['exception'] = exc
|
||||
|
||||
exc_str = str(exc)
|
||||
report.append("Exception: %s" % exc_str)
|
||||
error_messages[exc_str] = \
|
||||
error_messages.get(exc_str, 0) + 1
|
||||
|
||||
error_messages.get(exc_str, 0) + 1
|
||||
|
||||
# extract the code, if any ...
|
||||
code = exc.get('kwargs', {}).get('code')
|
||||
if code:
|
||||
codes[code] = codes.get(code, 0) + 1
|
||||
failure_type = code
|
||||
report.append("Failure Type: %s" % failure_type)
|
||||
|
||||
report.append('')
|
||||
report.append("Details:")
|
||||
failed_request['failure_type'] = failure_type
|
||||
raws = models.RawData.objects.filter(request_id=req)\
|
||||
.exclude(event='compute.instance.exists')\
|
||||
.order_by('when')
|
||||
failed_request['details'] = []
|
||||
|
||||
for raw in raws:
|
||||
report.append("H: %s E:%s, S:(%s->%s) T:(%s->%s)" %
|
||||
(raw.host, raw.event,
|
||||
raw.old_state, raw.state, raw.old_task,
|
||||
raw.task))
|
||||
report.append('---------------------------------------')
|
||||
failure_detail = {}
|
||||
failure_detail['host'] = raw.host
|
||||
failure_detail['event'] = raw.event
|
||||
failure_detail['old_state'] = raw.old_state
|
||||
failure_detail['state'] = raw.state
|
||||
failure_detail['old_task'] = raw.old_task
|
||||
failure_detail['task'] = raw.task
|
||||
failed_request['details'].append(failure_detail)
|
||||
|
||||
report.append(failed_request)
|
||||
|
||||
cause_key = (key, failure_type)
|
||||
causes[cause_key] = causes.get(cause_key, 0) + 1
|
||||
|
||||
|
||||
def dump_breakdown(totals, label):
|
||||
p = prettytable.PrettyTable(["Category", "Count"])
|
||||
for k, v in totals.iteritems():
|
||||
p.add_row([k, v])
|
||||
report.append(label)
|
||||
p.sortby = 'Count'
|
||||
report.append(p.get_string())
|
||||
|
||||
|
||||
def dump_summary(info, label):
|
||||
report.append("-- %s by operation by cell by platform --" % (label,))
|
||||
p = prettytable.PrettyTable(["Operation", "Cell", "Platform", "Count",
|
||||
"Min", "Max", "Avg"])
|
||||
for c in ["Count", "Min", "Max", "Avg"]:
|
||||
p.align[c] = 'r'
|
||||
|
||||
total = 0
|
||||
op_totals = {}
|
||||
cell_totals = {}
|
||||
platform_totals = {}
|
||||
for key, count in info.iteritems():
|
||||
operation, platform, cell = key
|
||||
readable = image_type.readable(platform)
|
||||
text = "n/a"
|
||||
if readable:
|
||||
text = ", ".join(readable)
|
||||
|
||||
_min, _max, _count, _total = durations[key]
|
||||
_avg = float(_total) / float(_count)
|
||||
_fmin = dt.sec_to_str(_min)
|
||||
_fmax = dt.sec_to_str(_max)
|
||||
_favg = dt.sec_to_str(_avg * 100.0)
|
||||
|
||||
op_totals[operation] = op_totals.get(operation, 0) + count
|
||||
cell_totals[cell] = cell_totals.get(cell, 0) + count
|
||||
platform_totals[text] = platform_totals.get(text, 0) + count
|
||||
|
||||
p.add_row([operation, cell, text, count, _fmin, _fmax, _favg])
|
||||
total += count
|
||||
p.sortby = 'Count'
|
||||
report.append(p.get_string())
|
||||
|
||||
dump_breakdown(op_totals, "Total %s by Operation" % label)
|
||||
dump_breakdown(cell_totals, "Total %s by Cell" % label)
|
||||
dump_breakdown(platform_totals, "Total %s by Platform" % label)
|
||||
|
||||
report.append('')
|
||||
return total
|
||||
|
||||
|
||||
good = dump_summary(successes, "Success")
|
||||
bad = dump_summary(failures, "Failures")
|
||||
report.append("""
|
||||
SUMMARY
|
||||
|
||||
=====================================================
|
||||
Total Success: %d Total Failure: %d
|
||||
|
||||
""" % (good, bad))
|
||||
|
||||
p = prettytable.PrettyTable(["Tenant", "Count"])
|
||||
for tenant, count in tenant_issues.iteritems():
|
||||
p.add_row([tenant, count])
|
||||
p.sortby = 'Count'
|
||||
report.append("""
|
||||
-- Errors by Tenant --
|
||||
%s""" % p.get_string())
|
||||
|
||||
p = prettytable.PrettyTable(["Return Code", "Count"])
|
||||
for k, v in codes.iteritems():
|
||||
p.add_row([k, v])
|
||||
p.sortby = 'Count'
|
||||
report.append("""
|
||||
-- Return code counts --
|
||||
%s""" % p.get_string())
|
||||
|
||||
p = prettytable.PrettyTable(["Cause", "Operation", "Cell", "Platform", "Count"])
|
||||
for cause_key, count in causes.iteritems():
|
||||
key, cause = cause_key
|
||||
operation, platform, cell = key
|
||||
readable = image_type.readable(platform)
|
||||
text = "n/a"
|
||||
if readable:
|
||||
text = ", ".join(readable)
|
||||
p.add_row([cause, operation, cell, text, count])
|
||||
p.sortby = 'Count'
|
||||
report.append("""
|
||||
-- Cause breakdown --
|
||||
%s""" % p.get_string())
|
||||
|
||||
p = prettytable.PrettyTable(["Count", "Message"])
|
||||
for k, v in error_messages.iteritems():
|
||||
p.add_row([v, k[:80]])
|
||||
p.sortby = 'Count'
|
||||
report.append("""
|
||||
-- Error Message Counts --
|
||||
%s""" % p.get_string())
|
||||
|
||||
for r in report[1:]:
|
||||
print r
|
||||
|
||||
values = {'json': json.dumps(report),
|
||||
'created': dt.dt_to_decimal(datetime.datetime.utcnow()),
|
||||
'period_start': start,
|
||||
|
@ -22,8 +22,9 @@ import argparse
|
||||
import datetime
|
||||
import json
|
||||
import sys
|
||||
import os
|
||||
|
||||
sys.path.append("/stacktach")
|
||||
sys.path.append(os.environ.get('STACKTACH_INSTALL_DIR', '/stacktach'))
|
||||
|
||||
from stacktach import datetime_to_decimal as dt
|
||||
from stacktach import models
|
||||
|
@ -3,10 +3,10 @@ import datetime
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
|
||||
import os
|
||||
import prettytable
|
||||
|
||||
sys.path.append("/stacktach")
|
||||
sys.path.append(os.environ.get('STACKTACH_INSTALL_DIR', '/stacktach'))
|
||||
|
||||
from stacktach import datetime_to_decimal as dt
|
||||
from stacktach import image_type
|
||||
|
@ -10,7 +10,7 @@ try:
|
||||
db_username = STACKTACH_DB_USERNAME
|
||||
db_password = STACKTACH_DB_PASSWORD
|
||||
db_port = STACKTACH_DB_PORT
|
||||
install_dir = STACKTACH_INSTALL_DIR
|
||||
install_dir = os.path.expanduser(STACKTACH_INSTALL_DIR)
|
||||
except ImportError:
|
||||
db_engine = os.environ.get('STACKTACH_DB_ENGINE',
|
||||
'django.db.backends.mysql')
|
||||
@ -123,14 +123,12 @@ MIDDLEWARE_CLASSES = (
|
||||
)
|
||||
|
||||
ROOT_URLCONF = 'stacktach.urls'
|
||||
|
||||
TEMPLATE_DIRS = (
|
||||
# Put strings here, like "/home/html/django_templates" or "C:/www/django/templates".
|
||||
# Always use forward slashes, even on Windows.
|
||||
# Don't forget to use absolute paths, not relative paths.
|
||||
install_dir + "templates"
|
||||
)
|
||||
|
||||
INSTALLED_APPS = (
|
||||
'django.contrib.auth',
|
||||
'django.contrib.contenttypes',
|
||||
|
@ -29,7 +29,7 @@ def get_host_names():
|
||||
def routing_key_type(key):
|
||||
if key.endswith('error'):
|
||||
return 'E'
|
||||
return ' '
|
||||
return ' '
|
||||
|
||||
|
||||
def get_deployments():
|
||||
@ -71,14 +71,13 @@ def sec_to_time(diff):
|
||||
return "%dd %02d:%02d:%02d%s" % (days, hours, minutes, seconds, usec)
|
||||
|
||||
|
||||
def rsp(data, status=200):
|
||||
return HttpResponse(json.dumps(data), content_type="application/json",
|
||||
status=status)
|
||||
def rsp(data, content_type="application/json", status=200):
|
||||
return HttpResponse(data, content_type=content_type, status=status)
|
||||
|
||||
|
||||
def error_response(status, type, message):
|
||||
results = [["Error", "Message"], [type, message]]
|
||||
return rsp(results, status)
|
||||
return rsp(json.dumps(results), status)
|
||||
|
||||
|
||||
def do_deployments(request):
|
||||
@ -86,7 +85,7 @@ def do_deployments(request):
|
||||
results = [["#", "Name"]]
|
||||
for deployment in deployments:
|
||||
results.append([deployment.id, deployment.name])
|
||||
return rsp(results)
|
||||
return rsp(json.dumps(results))
|
||||
|
||||
|
||||
def do_events(request):
|
||||
@ -94,7 +93,7 @@ def do_events(request):
|
||||
results = [["Event Name"]]
|
||||
for event in events:
|
||||
results.append([event['event']])
|
||||
return rsp(results)
|
||||
return rsp(json.dumps(results))
|
||||
|
||||
|
||||
def do_hosts(request):
|
||||
@ -102,7 +101,7 @@ def do_hosts(request):
|
||||
results = [["Host Name"]]
|
||||
for host in hosts:
|
||||
results.append([host['host']])
|
||||
return rsp(results)
|
||||
return rsp(json.dumps(results))
|
||||
|
||||
|
||||
def do_uuid(request):
|
||||
@ -120,7 +119,7 @@ def do_uuid(request):
|
||||
results.append([e.id, routing_key_type(e.routing_key), str(when),
|
||||
e.deployment.name, e.event, e.host, e.state,
|
||||
e.old_state, e.old_task])
|
||||
return rsp(results)
|
||||
return rsp(json.dumps(results))
|
||||
|
||||
|
||||
def do_timings_uuid(request):
|
||||
@ -128,8 +127,8 @@ def do_timings_uuid(request):
|
||||
if not utils.is_uuid_like(uuid):
|
||||
msg = "%s is not uuid-like" % uuid
|
||||
return error_response(400, 'Bad Request', msg)
|
||||
|
||||
return rsp(get_timings_for_uuid(uuid))
|
||||
results = get_timings_for_uuid(uuid)
|
||||
return rsp(json.dumps(results))
|
||||
|
||||
|
||||
def do_timings(request):
|
||||
@ -148,7 +147,7 @@ def do_timings(request):
|
||||
|
||||
for t in timings:
|
||||
results.append([t.lifecycle.instance, sec_to_time(t.diff)])
|
||||
return rsp(results)
|
||||
return rsp(json.dumps(results))
|
||||
|
||||
|
||||
def do_summary(request):
|
||||
@ -182,7 +181,7 @@ def do_summary(request):
|
||||
|
||||
results.append([name, int(num), sec_to_time(_min),
|
||||
sec_to_time(_max), sec_to_time(int(total / num))])
|
||||
return rsp(results)
|
||||
return rsp(json.dumps(results))
|
||||
|
||||
|
||||
def do_request(request):
|
||||
@ -200,7 +199,7 @@ def do_request(request):
|
||||
results.append([e.id, routing_key_type(e.routing_key), str(when),
|
||||
e.deployment.name, e.event, e.host, e.state,
|
||||
e.old_state, e.old_task])
|
||||
return rsp(results)
|
||||
return rsp(json.dumps(results))
|
||||
|
||||
|
||||
def do_show(request, event_id):
|
||||
@ -231,7 +230,7 @@ def do_show(request, event_id):
|
||||
final.append(json.dumps(j, indent=2))
|
||||
final.append(event.instance)
|
||||
|
||||
return rsp(final)
|
||||
return rsp(json.dumps(final))
|
||||
|
||||
|
||||
def do_watch(request, deployment_id):
|
||||
@ -287,8 +286,9 @@ def do_watch(request, deployment_id):
|
||||
deployment_map[raw.deployment.id].name,
|
||||
raw.event,
|
||||
uuid])
|
||||
results_json = json.dumps([c, results, str(dec_now)])
|
||||
|
||||
return rsp([c, results, str(dec_now)])
|
||||
return rsp(results_json)
|
||||
|
||||
|
||||
def do_kpi(request, tenant_id=None):
|
||||
@ -312,7 +312,7 @@ def do_kpi(request, tenant_id=None):
|
||||
if tenant_id is None or (tenant_id == end_event.tenant):
|
||||
results.append([event, sec_to_time(track.duration),
|
||||
uuid, end_event.deployment.name])
|
||||
return rsp(results)
|
||||
return rsp(json.dumps(results))
|
||||
|
||||
|
||||
def do_list_usage_launches(request):
|
||||
@ -338,7 +338,7 @@ def do_list_usage_launches(request):
|
||||
launched = str(dt.dt_from_decimal(launch.launched_at))
|
||||
results.append([launch.instance, launched, launch.instance_type_id])
|
||||
|
||||
return rsp(results)
|
||||
return rsp(json.dumps(results))
|
||||
|
||||
|
||||
def do_list_usage_deletes(request):
|
||||
@ -367,7 +367,7 @@ def do_list_usage_deletes(request):
|
||||
deleted = str(dt.dt_from_decimal(delete.deleted_at))
|
||||
results.append([delete.instance, launched, deleted])
|
||||
|
||||
return rsp(results)
|
||||
return rsp(json.dumps(results))
|
||||
|
||||
|
||||
def do_list_usage_exists(request):
|
||||
@ -399,7 +399,7 @@ def do_list_usage_exists(request):
|
||||
exist.instance_type_id, exist.message_id,
|
||||
exist.status])
|
||||
|
||||
return rsp(results)
|
||||
return rsp(json.dumps(results), content_type="application/json")
|
||||
|
||||
|
||||
def do_jsonreports(request):
|
||||
@ -419,7 +419,7 @@ def do_jsonreports(request):
|
||||
float(report.created),
|
||||
report.name,
|
||||
report.version])
|
||||
return rsp(results)
|
||||
return rsp(json.dumps(results))
|
||||
|
||||
|
||||
def do_jsonreport(request, report_id):
|
||||
|
Loading…
x
Reference in New Issue
Block a user