diff --git a/etc/pip-requires.txt b/etc/pip-requires.txt index c88953c..0024701 100644 --- a/etc/pip-requires.txt +++ b/etc/pip-requires.txt @@ -3,4 +3,5 @@ MySQL-python>=1.2.3 eventlet>=0.9.17 kombu>=2.4.7 librabbitmq>=1.0.0 +prettytable>=0.7.2 argparse diff --git a/reports/error_details.py b/reports/error_details.py index 7c9622b..f1bf891 100644 --- a/reports/error_details.py +++ b/reports/error_details.py @@ -2,11 +2,9 @@ import datetime import json import sys import time +import os -import prettytable - -sys.path.append("/stacktach") - +sys.path.append(os.environ.get('STACKTACH_INSTALL_DIR', '/stacktach')) from stacktach import datetime_to_decimal as dt from stacktach import image_type from stacktach import models @@ -28,14 +26,13 @@ if len(sys.argv) == 2: hours = 0 length = 24 -start = datetime.datetime(year=yesterday.year, month=yesterday.month, - day=yesterday.day) +start = datetime.datetime(year=yesterday.year, month=yesterday.month, + day=yesterday.day) end = start + datetime.timedelta(hours=length-1, minutes=59, seconds=59) instance_map = {} # { uuid : [request_id, request_id, ...] } -metadata = {'raw_text': True, 'instances': instance_map} -report = [metadata] # Tell Stacky not to format results. -report.append("Generating report for %s to %s" % (start, end)) +metadata = {'report_format': 'json', 'instances': instance_map} +report = [metadata] # Tell Stacky to format as JSON dstart = dt.dt_to_decimal(start) dend = dt.dt_to_decimal(end) @@ -66,19 +63,19 @@ for uuid_dict in updates: # All the unique Request ID's for this instance during that timespan. reqs = models.RawData.objects.filter(instance=uuid, - when__gt=dstart, when__lte=dend) \ + when__gt=dstart, when__lte=dend)\ .values('request_id').distinct() req_list = [] for req_dict in reqs: req = req_dict['request_id'] - raws = list(models.RawData.objects.filter(request_id=req)\ - .exclude(event='compute.instance.exists')\ - .values("id", "when", "routing_key", "old_state", - "state", "tenant", "event", "image_type", - "deployment")\ - .order_by('when')) + raws = list(models.RawData.objects.filter(request_id=req) + .exclude(event='compute.instance.exists') + .values("id", "when", "routing_key", "old_state", + "state", "tenant", "event", "image_type", + "deployment") + .order_by('when')) _start = None err_id = None @@ -115,7 +112,7 @@ for uuid_dict in updates: err_id = _id if _old_state == 'error' and \ - (not _state in ['deleted', 'error']): + (not _state in ['deleted', 'error']): failure_type = None err_id = None @@ -143,7 +140,7 @@ for uuid_dict in updates: _end = _when diff = _end - _start - if diff > 3600 and failure_type == None: + if diff > 3600 and failure_type is None: failure_type = ">60" key = (operation, image_type_num, cell) @@ -161,15 +158,13 @@ for uuid_dict in updates: if not failure_type: successes[key] = successes.get(key, 0) + 1 else: + failed_request = {} req_list.append(req) instance_map[uuid] = req_list - - report.append('') - report.append("------ %s ----------" % uuid) - report.append("Req: %s" % req) - report.append("Duration: %.2f minutes" % (diff / 60)) - report.append("Operation: %s" % operation) - report.append("Platform: %s" % image_type.readable(image_type_num)) + failed_request['req'] = req + failed_request['duration'] = "%.2f minutes" % (diff/60) + failed_request['operation'] = operation + failed_request['platform'] = image_type.readable(image_type_num) failures[key] = failures.get(key, 0) + 1 tenant_issues[tenant] = tenant_issues.get(tenant, 0) + 1 @@ -177,146 +172,51 @@ for uuid_dict in updates: err = models.RawData.objects.get(id=err_id) queue, body = json.loads(err.json) payload = body['payload'] - - report.append("Event ID: %s" % err.id) - report.append("Tenant: %s" % err.tenant) - report.append("Service: %s" % err.service) - report.append("Host: %s" % err.host) - report.append("Deployment: %s" % err.deployment.name) - report.append("Event: %s" % err.event) - report.append("When: %s" % dt.dt_from_decimal(err.when)) + + # Add error information to failed request report + failed_request['event_id'] = err.id + failed_request['tenant'] = err.tenant + failed_request['service'] = err.service + failed_request['host'] = err.host + failed_request['deployment'] = err.deployment.name + failed_request['event'] = err.event + failed_request['when'] = str(dt.dt_from_decimal(err.when)) + exc = payload.get('exception') if exc: # group the messages ... + failed_request['exception'] = exc + exc_str = str(exc) - report.append("Exception: %s" % exc_str) error_messages[exc_str] = \ - error_messages.get(exc_str, 0) + 1 - + error_messages.get(exc_str, 0) + 1 + # extract the code, if any ... code = exc.get('kwargs', {}).get('code') if code: codes[code] = codes.get(code, 0) + 1 failure_type = code - report.append("Failure Type: %s" % failure_type) - - report.append('') - report.append("Details:") + failed_request['failure_type'] = failure_type raws = models.RawData.objects.filter(request_id=req)\ .exclude(event='compute.instance.exists')\ .order_by('when') + failed_request['details'] = [] for raw in raws: - report.append("H: %s E:%s, S:(%s->%s) T:(%s->%s)" % - (raw.host, raw.event, - raw.old_state, raw.state, raw.old_task, - raw.task)) - report.append('---------------------------------------') + failure_detail = {} + failure_detail['host'] = raw.host + failure_detail['event'] = raw.event + failure_detail['old_state'] = raw.old_state + failure_detail['state'] = raw.state + failure_detail['old_task'] = raw.old_task + failure_detail['task'] = raw.task + failed_request['details'].append(failure_detail) + + report.append(failed_request) + cause_key = (key, failure_type) causes[cause_key] = causes.get(cause_key, 0) + 1 - -def dump_breakdown(totals, label): - p = prettytable.PrettyTable(["Category", "Count"]) - for k, v in totals.iteritems(): - p.add_row([k, v]) - report.append(label) - p.sortby = 'Count' - report.append(p.get_string()) - - -def dump_summary(info, label): - report.append("-- %s by operation by cell by platform --" % (label,)) - p = prettytable.PrettyTable(["Operation", "Cell", "Platform", "Count", - "Min", "Max", "Avg"]) - for c in ["Count", "Min", "Max", "Avg"]: - p.align[c] = 'r' - - total = 0 - op_totals = {} - cell_totals = {} - platform_totals = {} - for key, count in info.iteritems(): - operation, platform, cell = key - readable = image_type.readable(platform) - text = "n/a" - if readable: - text = ", ".join(readable) - - _min, _max, _count, _total = durations[key] - _avg = float(_total) / float(_count) - _fmin = dt.sec_to_str(_min) - _fmax = dt.sec_to_str(_max) - _favg = dt.sec_to_str(_avg * 100.0) - - op_totals[operation] = op_totals.get(operation, 0) + count - cell_totals[cell] = cell_totals.get(cell, 0) + count - platform_totals[text] = platform_totals.get(text, 0) + count - - p.add_row([operation, cell, text, count, _fmin, _fmax, _favg]) - total += count - p.sortby = 'Count' - report.append(p.get_string()) - - dump_breakdown(op_totals, "Total %s by Operation" % label) - dump_breakdown(cell_totals, "Total %s by Cell" % label) - dump_breakdown(platform_totals, "Total %s by Platform" % label) - - report.append('') - return total - - -good = dump_summary(successes, "Success") -bad = dump_summary(failures, "Failures") -report.append(""" -SUMMARY - -===================================================== -Total Success: %d Total Failure: %d - -""" % (good, bad)) - -p = prettytable.PrettyTable(["Tenant", "Count"]) -for tenant, count in tenant_issues.iteritems(): - p.add_row([tenant, count]) -p.sortby = 'Count' -report.append(""" --- Errors by Tenant -- -%s""" % p.get_string()) - -p = prettytable.PrettyTable(["Return Code", "Count"]) -for k, v in codes.iteritems(): - p.add_row([k, v]) -p.sortby = 'Count' -report.append(""" --- Return code counts -- -%s""" % p.get_string()) - -p = prettytable.PrettyTable(["Cause", "Operation", "Cell", "Platform", "Count"]) -for cause_key, count in causes.iteritems(): - key, cause = cause_key - operation, platform, cell = key - readable = image_type.readable(platform) - text = "n/a" - if readable: - text = ", ".join(readable) - p.add_row([cause, operation, cell, text, count]) -p.sortby = 'Count' -report.append(""" --- Cause breakdown -- -%s""" % p.get_string()) - -p = prettytable.PrettyTable(["Count", "Message"]) -for k, v in error_messages.iteritems(): - p.add_row([v, k[:80]]) -p.sortby = 'Count' -report.append(""" --- Error Message Counts -- -%s""" % p.get_string()) - -for r in report[1:]: - print r - values = {'json': json.dumps(report), 'created': dt.dt_to_decimal(datetime.datetime.utcnow()), 'period_start': start, diff --git a/reports/nova_usage_audit.py b/reports/nova_usage_audit.py index e01ee5f..a87587e 100644 --- a/reports/nova_usage_audit.py +++ b/reports/nova_usage_audit.py @@ -22,8 +22,9 @@ import argparse import datetime import json import sys +import os -sys.path.append("/stacktach") +sys.path.append(os.environ.get('STACKTACH_INSTALL_DIR', '/stacktach')) from stacktach import datetime_to_decimal as dt from stacktach import models diff --git a/reports/pretty.py b/reports/pretty.py index bffbc75..7867592 100644 --- a/reports/pretty.py +++ b/reports/pretty.py @@ -3,10 +3,10 @@ import datetime import json import sys import time - +import os import prettytable -sys.path.append("/stacktach") +sys.path.append(os.environ.get('STACKTACH_INSTALL_DIR', '/stacktach')) from stacktach import datetime_to_decimal as dt from stacktach import image_type diff --git a/settings.py b/settings.py index 7d0e86a..bdf7af3 100644 --- a/settings.py +++ b/settings.py @@ -10,7 +10,7 @@ try: db_username = STACKTACH_DB_USERNAME db_password = STACKTACH_DB_PASSWORD db_port = STACKTACH_DB_PORT - install_dir = STACKTACH_INSTALL_DIR + install_dir = os.path.expanduser(STACKTACH_INSTALL_DIR) except ImportError: db_engine = os.environ.get('STACKTACH_DB_ENGINE', 'django.db.backends.mysql') @@ -123,14 +123,12 @@ MIDDLEWARE_CLASSES = ( ) ROOT_URLCONF = 'stacktach.urls' - TEMPLATE_DIRS = ( # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". # Always use forward slashes, even on Windows. # Don't forget to use absolute paths, not relative paths. install_dir + "templates" ) - INSTALLED_APPS = ( 'django.contrib.auth', 'django.contrib.contenttypes', diff --git a/stacktach/stacky_server.py b/stacktach/stacky_server.py index 259a5bf..010898c 100644 --- a/stacktach/stacky_server.py +++ b/stacktach/stacky_server.py @@ -29,7 +29,7 @@ def get_host_names(): def routing_key_type(key): if key.endswith('error'): return 'E' - return ' ' + return ' ' def get_deployments(): @@ -71,14 +71,13 @@ def sec_to_time(diff): return "%dd %02d:%02d:%02d%s" % (days, hours, minutes, seconds, usec) -def rsp(data, status=200): - return HttpResponse(json.dumps(data), content_type="application/json", - status=status) +def rsp(data, content_type="application/json", status=200): + return HttpResponse(data, content_type=content_type, status=status) def error_response(status, type, message): results = [["Error", "Message"], [type, message]] - return rsp(results, status) + return rsp(json.dumps(results), status) def do_deployments(request): @@ -86,7 +85,7 @@ def do_deployments(request): results = [["#", "Name"]] for deployment in deployments: results.append([deployment.id, deployment.name]) - return rsp(results) + return rsp(json.dumps(results)) def do_events(request): @@ -94,7 +93,7 @@ def do_events(request): results = [["Event Name"]] for event in events: results.append([event['event']]) - return rsp(results) + return rsp(json.dumps(results)) def do_hosts(request): @@ -102,7 +101,7 @@ def do_hosts(request): results = [["Host Name"]] for host in hosts: results.append([host['host']]) - return rsp(results) + return rsp(json.dumps(results)) def do_uuid(request): @@ -120,7 +119,7 @@ def do_uuid(request): results.append([e.id, routing_key_type(e.routing_key), str(when), e.deployment.name, e.event, e.host, e.state, e.old_state, e.old_task]) - return rsp(results) + return rsp(json.dumps(results)) def do_timings_uuid(request): @@ -128,8 +127,8 @@ def do_timings_uuid(request): if not utils.is_uuid_like(uuid): msg = "%s is not uuid-like" % uuid return error_response(400, 'Bad Request', msg) - - return rsp(get_timings_for_uuid(uuid)) + results = get_timings_for_uuid(uuid) + return rsp(json.dumps(results)) def do_timings(request): @@ -148,7 +147,7 @@ def do_timings(request): for t in timings: results.append([t.lifecycle.instance, sec_to_time(t.diff)]) - return rsp(results) + return rsp(json.dumps(results)) def do_summary(request): @@ -182,7 +181,7 @@ def do_summary(request): results.append([name, int(num), sec_to_time(_min), sec_to_time(_max), sec_to_time(int(total / num))]) - return rsp(results) + return rsp(json.dumps(results)) def do_request(request): @@ -200,7 +199,7 @@ def do_request(request): results.append([e.id, routing_key_type(e.routing_key), str(when), e.deployment.name, e.event, e.host, e.state, e.old_state, e.old_task]) - return rsp(results) + return rsp(json.dumps(results)) def do_show(request, event_id): @@ -231,7 +230,7 @@ def do_show(request, event_id): final.append(json.dumps(j, indent=2)) final.append(event.instance) - return rsp(final) + return rsp(json.dumps(final)) def do_watch(request, deployment_id): @@ -287,8 +286,9 @@ def do_watch(request, deployment_id): deployment_map[raw.deployment.id].name, raw.event, uuid]) + results_json = json.dumps([c, results, str(dec_now)]) - return rsp([c, results, str(dec_now)]) + return rsp(results_json) def do_kpi(request, tenant_id=None): @@ -312,7 +312,7 @@ def do_kpi(request, tenant_id=None): if tenant_id is None or (tenant_id == end_event.tenant): results.append([event, sec_to_time(track.duration), uuid, end_event.deployment.name]) - return rsp(results) + return rsp(json.dumps(results)) def do_list_usage_launches(request): @@ -338,7 +338,7 @@ def do_list_usage_launches(request): launched = str(dt.dt_from_decimal(launch.launched_at)) results.append([launch.instance, launched, launch.instance_type_id]) - return rsp(results) + return rsp(json.dumps(results)) def do_list_usage_deletes(request): @@ -367,7 +367,7 @@ def do_list_usage_deletes(request): deleted = str(dt.dt_from_decimal(delete.deleted_at)) results.append([delete.instance, launched, deleted]) - return rsp(results) + return rsp(json.dumps(results)) def do_list_usage_exists(request): @@ -399,7 +399,7 @@ def do_list_usage_exists(request): exist.instance_type_id, exist.message_id, exist.status]) - return rsp(results) + return rsp(json.dumps(results), content_type="application/json") def do_jsonreports(request): @@ -419,7 +419,7 @@ def do_jsonreports(request): float(report.created), report.name, report.version]) - return rsp(results) + return rsp(json.dumps(results)) def do_jsonreport(request, report_id):