From ef92c33f74448e4b5ad81b9ce12152061c448938 Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Fri, 15 Feb 2013 15:05:43 -0400 Subject: [PATCH 01/13] started on refactoring for better command line support --- reports/pretty.py | 289 +++++++++++++++++++++++++------------------- settings.py | 2 +- stacktach/models.py | 11 ++ 3 files changed, 180 insertions(+), 122 deletions(-) diff --git a/reports/pretty.py b/reports/pretty.py index d177867..1f19ae9 100644 --- a/reports/pretty.py +++ b/reports/pretty.py @@ -1,3 +1,4 @@ +import argparse import datetime import json import sys @@ -6,161 +7,207 @@ import time import prettytable sys.path.append("/stacktach") +sys.path.append(".") from stacktach import datetime_to_decimal as dt from stacktach import image_type from stacktach import models -if __name__ != '__main__': - sys.exit(1) +def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, store=False): + if not yesterday: + yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(days=1) -yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(days=1) -if len(sys.argv) == 2: - try: - t = time.strptime(sys.argv[1], "%Y-%m-%d") - yesterday = datetime.datetime(*t[:6]) - except Exception, e: - print e - print "Usage: python requests.py YYYY-MM-DD (the end date)" - sys.exit(1) + start = datetime.datetime(year=yesterday.year, month=yesterday.month, + day=yesterday.day, hour=start_hour) + end = start + datetime.timedelta(hours=hours-1, minutes=59, seconds=59) -percentile = 90 -hours = 24 + dstart = dt.dt_to_decimal(start) + dend = dt.dt_to_decimal(end) -start = datetime.datetime(year=yesterday.year, month=yesterday.month, - day=yesterday.day) -end = start + datetime.timedelta(hours=hours-1, minutes=59, seconds=59) + codes = {} -print "Generating report for %s to %s" % (start, end) + # Get all the instances that have changed in the last N hours ... + updates = models.RawData.objects.filter(event='compute.instance.update', + when__gt=dstart, when__lte=dend)\ + .values('instance').distinct() -dstart = dt.dt_to_decimal(start) -dend = dt.dt_to_decimal(end) + expiry = 60 * 60 # 1 hour + cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot'] -codes = {} + failures = {} + durations = {} + attempts = {} -# Get all the instances that have changed in the last N hours ... -updates = models.RawData.objects.filter(event='compute.instance.update', - when__gt=dstart, when__lte=dend)\ - .values('instance').distinct() + for uuid_dict in updates: + uuid = uuid_dict['instance'] -expiry = 60 * 60 # 1 hour -cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot'] - -failures = {} -durations = {} -attempts = {} - -for uuid_dict in updates: - uuid = uuid_dict['instance'] - - # All the unique Request ID's for this instance during that timespan. - reqs = models.RawData.objects.filter(instance=uuid, - when__gt=dstart, when__lte=dend) \ - .values('request_id').distinct() + # All the unique Request ID's for this instance during that timespan. + reqs = models.RawData.objects.filter(instance=uuid, + when__gt=dstart, when__lte=dend) \ + .values('request_id').distinct() - for req_dict in reqs: - report = False - req = req_dict['request_id'] - raws = models.RawData.objects.filter(request_id=req)\ - .exclude(event='compute.instance.exists')\ - .order_by('when') + for req_dict in reqs: + report = False + req = req_dict['request_id'] + raws = models.RawData.objects.filter(request_id=req)\ + .exclude(event='compute.instance.exists')\ + .order_by('when') - start = None - err = None + start = None + err = None - operation = "aux" - image_type_num = 0 + operation = "aux" + image_type_num = 0 + + for raw in raws: + if not start: + start = raw.when + if 'error' in raw.routing_key: + err = raw + report = True + + for cmd in cmds: + if cmd in raw.event: + operation = cmd + break + + if raw.image_type: + image_type_num |= raw.image_type + + image = "?" + if image_type.isset(image_type_num, image_type.BASE_IMAGE): + image = "base" + if image_type.isset(image_type_num, image_type.SNAPSHOT_IMAGE): + image = "snap" - for raw in raws: if not start: - start = raw.when - if 'error' in raw.routing_key: - err = raw + continue + + end = raw.when + diff = end - start + + if diff > 3600: report = True - for cmd in cmds: - if cmd in raw.event: - operation = cmd - break + key = (operation, image) - if raw.image_type: - image_type_num |= raw.image_type + # Track durations for all attempts, good and bad ... + _durations = durations.get(key, []) + _durations.append(diff) + durations[key] = _durations - image = "?" - if image_type.isset(image_type_num, image_type.BASE_IMAGE): - image = "base" - if image_type.isset(image_type_num, image_type.SNAPSHOT_IMAGE): - image = "snap" + attempts[key] = attempts.get(key, 0) + 1 - if not start: - continue + if report: + failures[key] = failures.get(key, 0) + 1 - end = raw.when - diff = end - start + # Summarize the results ... + report = [] + pct = (float(100 - percentile) / 2.0) / 100.0 + details = {'percentile': percentile, 'pct': pct, 'hours': hours, + 'start': start, 'end': end} + report.append(details) - if diff > 3600: - report = True + cols = ["Operation", "Image", "Min*", "Max*", "Avg*", + "Requests", "# Fail", "Fail %"] + report.append(cols) - key = (operation, image) + total = 0 + failure_total = 0 + for key, count in attempts.iteritems(): + total += count + operation, image = key - # Track durations for all attempts, good and bad ... - _durations = durations.get(key, []) - _durations.append(diff) - durations[key] = _durations + failure_count = failures.get(key, 0) + failure_total += failure_count + failure_percentage = float(failure_count) / float(count) - attempts[key] = attempts.get(key, 0) + 1 + # N-th % of durations ... + _values = durations[key] + _values.sort() + _outliers = int(float(len(_values)) * pct) + if _outliers > 0: + before = len(_values) + _values = _values[_outliers:-_outliers] + _min = 99999999 + _max = 0 + _total = 0.0 + for value in _values: + _min = min(_min, value) + _max = max(_max, value) + _total += float(value) + _avg = float(_total) / float(len(_values)) + _fmin = dt.sec_to_str(_min) + _fmax = dt.sec_to_str(_max) + _favg = dt.sec_to_str(_avg) - if report: - failures[key] = failures.get(key, 0) + 1 + report.add_row([operation, image, _fmin, _fmax, _favg, count, + failure_count, failure_percentage]) -# Print the results ... -cols = ["Operation", "Image", "Min*", "Max*", "Avg*", - "Requests", "# Fail", "Fail %"] -p = prettytable.PrettyTable(cols) -for c in cols[2:]: - p.align[c] = 'r' -p.sortby = cols[0] + details['total'] = total + details['failures'] = failures + details['failure_rate'] = (float(failure_total)/float(total)) * 100.0 + return report -pct = (float(100 - percentile) / 2.0) / 100.0 -print "* Using %d-th percentile for results (+/-%.1f%% cut)" % \ - (percentile, pct * 100.0) -total = 0 -failure_total = 0 -for key, count in attempts.iteritems(): - total += count - operation, image = key - failure_count = failures.get(key, 0) - failure_total += failure_count - failure_percentage = float(failure_count) / float(count) - _failure_percentage = "%.1f%%" % (failure_percentage * 100.0) +def valid_date(date): + try: + t = time.strptime(date, "%Y-%m-%d") + return datetime.datetime(*t[:6]) + except Exception, e: + raise argparse.ArgumentTypeError("'%s' is not in YYYY-MM-DD format." % date) - # N-th % of durations ... - _values = durations[key] - _values.sort() - _outliers = int(float(len(_values)) * pct) - if _outliers > 0: - before = len(_values) - _values = _values[_outliers:-_outliers] - print "culling %d -> %d" % (before, len(_values)) - _min = 99999999 - _max = 0 - _total = 0.0 - for value in _values: - _min = min(_min, value) - _max = max(_max, value) - _total += float(value) - _avg = float(_total) / float(len(_values)) - _fmin = dt.sec_to_str(_min) - _fmax = dt.sec_to_str(_max) - _favg = dt.sec_to_str(_avg) - p.add_row([operation, image, _fmin, _fmax, _favg, count, - failure_count, _failure_percentage]) -print p +if __name__ == '__main__': + parser = argparse.ArgumentParser('StackTach Nova Usage Summary Report') + parser.add_argument('--utcdate', help='Report start date YYYY-MM-DD. Default yesterday midnight.', + type=valid_date, default=None) + parser.add_argument('--hours', help='Report span in hours. Default: 24', default=24, type=int) + parser.add_argument('--start_hour', help='Starting hour 0-23. Default: 0', default=0, type=int) + parser.add_argument('--percentile', help='Percentile for timings. Default: 90', default=90, type=int) + parser.add_argument('--store', help='Store report in database. Default: False', default=False, + action="store_true") + parser.add_argument('--silent', help="Do not show summary report. Default: False", default=False, + action="store_true") + args = parser.parse_args() -print "Total: %d, Failures: %d, Failure Rate: %.1f%%" % \ - (total, failure_total, - (float(failure_total)/float(total)) * 100.0) + yesterday = args.utcdate + percentile = args.percentile + hours = args.hours + start_hour = args.start_hour + + print args + sys.exit(1) + raw_report = make_report(yesterday, start_hour, hours, percentile, args['store']) + + if not args.show: + sys.exit(1) + + details = raw_report[0] + percentile = details['percentile'] + pct = details['pct'] + start = details['start'] + end = details['end'] + print "Report for %s to %s" % (start, end) + + cols = raw_report[1] + + # Print the results ... + p = prettytable.PrettyTable(cols) + for c in cols[2:]: + p.align[c] = 'r' + p.sortby = cols[0] + + print "* Using %d-th percentile for results (+/-%.1f%% cut)" % \ + (percentile, pct * 100.0) + for row in raw_report[2:]: + p.add_row(row) + print p + + total = details['total'] + failure_total = details['failure_total'] + print "Total: %d, Failures: %d, Failure Rate: %.1f%%" % \ + (total, failure_total, + (float(failure_total)/float(total)) * 100.0) diff --git a/settings.py b/settings.py index 7233fcf..cf72d7c 100644 --- a/settings.py +++ b/settings.py @@ -19,7 +19,7 @@ except ImportError: db_password = os.environ['STACKTACH_DB_PASSWORD'] install_dir = os.environ['STACKTACH_INSTALL_DIR'] -DEBUG = False +DEBUG = True TEMPLATE_DEBUG = DEBUG ADMINS = ( diff --git a/stacktach/models.py b/stacktach/models.py index 9a09059..844737f 100644 --- a/stacktach/models.py +++ b/stacktach/models.py @@ -154,3 +154,14 @@ class RequestTracker(models.Model): # Not used ... but soon hopefully. completed = models.BooleanField(default=False, db_index=True) + + +class JsonReport(models.Model): + """Stores cron-job reports in raw json format for extraction + via stacky/rest. All DateTimes are UTC.""" + period_start = models.DateTimeField(db_index=True) + period_end = models.DateTimeField(db_index=True) + created = models.DateTimeField(db_index=True) + name = models.CharField(max_length=50, db_index=True) + version = models.IntegerField(default=1) + json = models.TextField() From 1968fe08998dde0a61ec452a1f5ee65cf46faaaa Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Mon, 18 Feb 2013 11:34:14 -0400 Subject: [PATCH 02/13] migration 004 --- migrations/004_usage.sql | 86 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 migrations/004_usage.sql diff --git a/migrations/004_usage.sql b/migrations/004_usage.sql new file mode 100644 index 0000000..118b365 --- /dev/null +++ b/migrations/004_usage.sql @@ -0,0 +1,86 @@ +BEGIN; +CREATE TABLE `stacktach_deployment` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL +) +; +CREATE TABLE `stacktach_rawdata` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `deployment_id` integer NOT NULL, + `tenant` varchar(50), + `json` longtext NOT NULL, + `routing_key` varchar(50), + `state` varchar(20), + `old_state` varchar(20), + `old_task` varchar(30), + `task` varchar(30), + `image_type` integer, + `when` numeric(20, 6) NOT NULL, + `publisher` varchar(100), + `event` varchar(50), + `service` varchar(50), + `host` varchar(100), + `instance` varchar(50), + `request_id` varchar(50) +) +; +ALTER TABLE `stacktach_rawdata` ADD CONSTRAINT `deployment_id_refs_id_362370d` FOREIGN KEY (`deployment_id`) REFERENCES `stacktach_deployment` (`id`); +CREATE TABLE `stacktach_lifecycle` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `instance` varchar(50), + `last_state` varchar(50), + `last_task_state` varchar(50), + `last_raw_id` integer +) +; +ALTER TABLE `stacktach_lifecycle` ADD CONSTRAINT `last_raw_id_refs_id_2a04e82d` FOREIGN KEY (`last_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_instanceusage` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `instance` varchar(50), + `launched_at` numeric(20, 6), + `deleted_at` numeric(20, 6), + `request_id` varchar(50), + `instance_type_id` varchar(50) +) +; +CREATE TABLE `stacktach_instanceexists` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `instance` varchar(50), + `launched_at` numeric(20, 6), + `deleted_at` numeric(20, 6), + `message_id` varchar(50), + `instance_type_id` varchar(50), + `status` varchar(50) NOT NULL, + `raw_id` integer, + `usage_id` integer +) +; +ALTER TABLE `stacktach_instanceexists` ADD CONSTRAINT `usage_id_refs_id_3b13299b` FOREIGN KEY (`usage_id`) REFERENCES `stacktach_instanceusage` (`id`); +ALTER TABLE `stacktach_instanceexists` ADD CONSTRAINT `raw_id_refs_id_65c72953` FOREIGN KEY (`raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_timing` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `start_raw_id` integer, + `end_raw_id` integer, + `start_when` numeric(20, 6), + `end_when` numeric(20, 6), + `diff` numeric(20, 6) +) +; +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `start_raw_id_refs_id_3cd201fc` FOREIGN KEY (`start_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `end_raw_id_refs_id_3cd201fc` FOREIGN KEY (`end_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `lifecycle_id_refs_id_4255ead8` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +CREATE TABLE `stacktach_requesttracker` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `request_id` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `last_timing_id` integer, + `start` numeric(20, 6) NOT NULL, + `duration` numeric(20, 6) NOT NULL, + `completed` bool NOT NULL +) +; +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `last_timing_id_refs_id_f7d8336` FOREIGN KEY (`last_timing_id`) REFERENCES `stacktach_timing` (`id`); +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `lifecycle_id_refs_id_e457729` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +COMMIT; From 174348ed482f017a7b51fe833120c0945e0fa6b1 Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Mon, 18 Feb 2013 19:54:24 -0400 Subject: [PATCH 03/13] Stored report support. The "pretty" report can now be stored in the stacktach database and retrieved via REST or stacky. Also better command line support for the "pretty" report. --- migrations/005_reports.txt | 3 ++ reports/pretty.py | 99 +++++++++++++++++++++++--------------- settings.py | 2 +- stacktach/models.py | 2 +- stacktach/stacky_server.py | 28 +++++++++++ stacktach/urls.py | 3 ++ 6 files changed, 96 insertions(+), 41 deletions(-) create mode 100644 migrations/005_reports.txt diff --git a/migrations/005_reports.txt b/migrations/005_reports.txt new file mode 100644 index 0000000..e10220f --- /dev/null +++ b/migrations/005_reports.txt @@ -0,0 +1,3 @@ +do +python manage.py syncdb +to add the JsonReport table diff --git a/reports/pretty.py b/reports/pretty.py index 1f19ae9..e08c6c0 100644 --- a/reports/pretty.py +++ b/reports/pretty.py @@ -7,23 +7,24 @@ import time import prettytable sys.path.append("/stacktach") -sys.path.append(".") from stacktach import datetime_to_decimal as dt from stacktach import image_type from stacktach import models -def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, store=False): +def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, + store=False): if not yesterday: - yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(days=1) + yesterday = datetime.datetime.utcnow().date() - \ + datetime.timedelta(days=1) - start = datetime.datetime(year=yesterday.year, month=yesterday.month, - day=yesterday.day, hour=start_hour) - end = start + datetime.timedelta(hours=hours-1, minutes=59, seconds=59) + rstart = datetime.datetime(year=yesterday.year, month=yesterday.month, + day=yesterday.day, hour=start_hour) + rend = rstart + datetime.timedelta(hours=hours-1, minutes=59, seconds=59) - dstart = dt.dt_to_decimal(start) - dend = dt.dt_to_decimal(end) + dstart = dt.dt_to_decimal(rstart) + dend = dt.dt_to_decimal(rend) codes = {} @@ -52,8 +53,8 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, store=Fal report = False req = req_dict['request_id'] raws = models.RawData.objects.filter(request_id=req)\ - .exclude(event='compute.instance.exists')\ - .order_by('when') + .exclude(event='compute.instance.exists')\ + .order_by('when') start = None err = None @@ -74,7 +75,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, store=Fal break if raw.image_type: - image_type_num |= raw.image_type + image_type_num |= raw.image_type image = "?" if image_type.isset(image_type_num, image_type.BASE_IMAGE): @@ -106,8 +107,8 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, store=Fal # Summarize the results ... report = [] pct = (float(100 - percentile) / 2.0) / 100.0 - details = {'percentile': percentile, 'pct': pct, 'hours': hours, - 'start': start, 'end': end} + details = {'percentile': percentile, 'pct': pct, 'hours': hours, + 'start': float(dstart), 'end': float(dend)} report.append(details) cols = ["Operation", "Image", "Min*", "Max*", "Avg*", @@ -143,13 +144,13 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, store=Fal _fmax = dt.sec_to_str(_max) _favg = dt.sec_to_str(_avg) - report.add_row([operation, image, _fmin, _fmax, _favg, count, - failure_count, failure_percentage]) + report.append([operation, image, _fmin, _fmax, _favg, count, + failure_count, failure_percentage]) details['total'] = total - details['failures'] = failures + details['failure_total'] = failure_total details['failure_rate'] = (float(failure_total)/float(total)) * 100.0 - return report + return (rstart, rend, report) def valid_date(date): @@ -157,39 +158,57 @@ def valid_date(date): t = time.strptime(date, "%Y-%m-%d") return datetime.datetime(*t[:6]) except Exception, e: - raise argparse.ArgumentTypeError("'%s' is not in YYYY-MM-DD format." % date) + raise argparse.ArgumentTypeError( + "'%s' is not in YYYY-MM-DD format." % date) if __name__ == '__main__': parser = argparse.ArgumentParser('StackTach Nova Usage Summary Report') - parser.add_argument('--utcdate', help='Report start date YYYY-MM-DD. Default yesterday midnight.', - type=valid_date, default=None) - parser.add_argument('--hours', help='Report span in hours. Default: 24', default=24, type=int) - parser.add_argument('--start_hour', help='Starting hour 0-23. Default: 0', default=0, type=int) - parser.add_argument('--percentile', help='Percentile for timings. Default: 90', default=90, type=int) - parser.add_argument('--store', help='Store report in database. Default: False', default=False, - action="store_true") - parser.add_argument('--silent', help="Do not show summary report. Default: False", default=False, - action="store_true") + parser.add_argument('--utcdate', + help='Report start date YYYY-MM-DD. Default yesterday midnight.', + type=valid_date, default=None) + parser.add_argument('--hours', + help='Report span in hours. Default: 24', default=24, + type=int) + parser.add_argument('--start_hour', + help='Starting hour 0-23. Default: 0', default=0, + type=int) + parser.add_argument('--percentile', + help='Percentile for timings. Default: 90', default=90, + type=int) + parser.add_argument('--store', + help='Store report in database. Default: False', + default=False, action="store_true") + parser.add_argument('--silent', + help="Do not show summary report. Default: False", + default=False, action="store_true") args = parser.parse_args() yesterday = args.utcdate percentile = args.percentile hours = args.hours start_hour = args.start_hour + store_report = args.store - print args - sys.exit(1) - raw_report = make_report(yesterday, start_hour, hours, percentile, args['store']) + start, end, raw_report = make_report(yesterday, start_hour, hours, + percentile, store_report) + details = raw_report[0] + pct = details['pct'] - if not args.show: + if store_report: + values = {'json': json.dumps(raw_report), + 'created': float(dt.dt_to_decimal(datetime.datetime.utcnow())), + 'period_start': start, + 'period_end': end, + 'version': 1, + 'name': 'summary report'} + report = models.JsonReport(**values) + report.save() + print "Report stored (id=%d)" % report.id + + if args.silent: sys.exit(1) - details = raw_report[0] - percentile = details['percentile'] - pct = details['pct'] - start = details['start'] - end = details['end'] print "Report for %s to %s" % (start, end) cols = raw_report[1] @@ -203,11 +222,13 @@ if __name__ == '__main__': print "* Using %d-th percentile for results (+/-%.1f%% cut)" % \ (percentile, pct * 100.0) for row in raw_report[2:]: - p.add_row(row) + frow = row[:] + frow[-1] = "%.1f%%" % (row[-1] * 100.0) + p.add_row(frow) print p total = details['total'] failure_total = details['failure_total'] + failure_rate = details['failure_rate'] print "Total: %d, Failures: %d, Failure Rate: %.1f%%" % \ - (total, failure_total, - (float(failure_total)/float(total)) * 100.0) + (total, failure_total, failure_rate) diff --git a/settings.py b/settings.py index cf72d7c..7233fcf 100644 --- a/settings.py +++ b/settings.py @@ -19,7 +19,7 @@ except ImportError: db_password = os.environ['STACKTACH_DB_PASSWORD'] install_dir = os.environ['STACKTACH_INSTALL_DIR'] -DEBUG = True +DEBUG = False TEMPLATE_DEBUG = DEBUG ADMINS = ( diff --git a/stacktach/models.py b/stacktach/models.py index 844737f..a8dae21 100644 --- a/stacktach/models.py +++ b/stacktach/models.py @@ -161,7 +161,7 @@ class JsonReport(models.Model): via stacky/rest. All DateTimes are UTC.""" period_start = models.DateTimeField(db_index=True) period_end = models.DateTimeField(db_index=True) - created = models.DateTimeField(db_index=True) + created = models.DecimalField(max_digits=20, decimal_places=6, db_index=True) name = models.CharField(max_length=50, db_index=True) version = models.IntegerField(default=1) json = models.TextField() diff --git a/stacktach/stacky_server.py b/stacktach/stacky_server.py index 994729c..6f47e0f 100644 --- a/stacktach/stacky_server.py +++ b/stacktach/stacky_server.py @@ -4,6 +4,7 @@ import json from django.db.models import Q from django.http import HttpResponse +from django.shortcuts import get_object_or_404 import datetime_to_decimal as dt import models @@ -375,3 +376,30 @@ def do_list_usage_exists(request): exist.status]) return rsp(results) + + +def do_jsonreports(request): + yesterday = datetime.datetime.utcnow() - datetime.timedelta(days=1) + now = datetime.datetime.utcnow() + yesterday = dt.dt_to_decimal(yesterday) + now = dt.dt_to_decimal(now) + _from = float(request.GET.get('created_from', yesterday)) + _to = float(request.GET.get('created_to', now)) + reports = models.JsonReport.objects.filter(created__gte=_from, + created__lte=_to) + results = [] + results.append(['Id', 'Start', 'End', 'Created', 'Name', 'Version']) + for report in reports: + results.append([report.id, + float(dt.dt_to_decimal(report.period_start)), + float(dt.dt_to_decimal(report.period_end)), + float(report.created), + report.name, + report.version]) + return rsp(results) + + +def do_jsonreport(request, report_id): + report_id = int(report_id) + report = get_object_or_404(models.JsonReport, pk=report_id) + return rsp(report.json) diff --git a/stacktach/urls.py b/stacktach/urls.py index bfa6678..240e1ee 100644 --- a/stacktach/urls.py +++ b/stacktach/urls.py @@ -12,6 +12,9 @@ urlpatterns = patterns('', url(r'stacky/timings/uuid/$', 'stacktach.stacky_server.do_timings_uuid'), url(r'stacky/summary/$', 'stacktach.stacky_server.do_summary'), url(r'stacky/request/$', 'stacktach.stacky_server.do_request'), + url(r'stacky/reports/$', 'stacktach.stacky_server.do_jsonreports'), + url(r'stacky/report/(?P\d+)/$', + 'stacktach.stacky_server.do_jsonreport'), url(r'stacky/show/(?P\d+)/$', 'stacktach.stacky_server.do_show'), url(r'stacky/watch/(?P\d+)/$', From b485c148f51c723b73125d93924d82b8f450e81f Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Tue, 19 Feb 2013 07:43:04 -0600 Subject: [PATCH 04/13] local date changes --- stacktach/views.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/stacktach/views.py b/stacktach/views.py index d770645..08f6d6d 100644 --- a/stacktach/views.py +++ b/stacktach/views.py @@ -321,17 +321,25 @@ def aggregate_usage(raw): def str_time_to_unix(when): - try: - when = datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S") - except ValueError: + if 'T' in when: + try: + # Old way of doing it + when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S.%f") + except ValueError: + try: + # Old way of doing it, no millis + when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S") + except Exception, e: + print "BAD DATE: ", e + else: try: when = datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S.%f") except ValueError: try: - # Old way of doing it - when = datetime.datetime.strptime(when, "%Y-%m-%dT%H:%M:%S.%f") + when = datetime.datetime.strptime(when, "%Y-%m-%d %H:%M:%S") except Exception, e: print "BAD DATE: ", e + return dt.dt_to_decimal(when) From baa46abddbfc21aaf5f8f1f65151962b025e59d6 Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Tue, 19 Feb 2013 09:59:51 -0400 Subject: [PATCH 05/13] Missing argparse in piprequires --- etc/pip-requires.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/etc/pip-requires.txt b/etc/pip-requires.txt index 2676d61..c88953c 100644 --- a/etc/pip-requires.txt +++ b/etc/pip-requires.txt @@ -3,3 +3,4 @@ MySQL-python>=1.2.3 eventlet>=0.9.17 kombu>=2.4.7 librabbitmq>=1.0.0 +argparse From ada9886a4d3fbd13b576da92776251b593e1dd68 Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Tue, 19 Feb 2013 10:21:47 -0400 Subject: [PATCH 06/13] No float conversion for 2.6 --- reports/pretty.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/reports/pretty.py b/reports/pretty.py index e08c6c0..29990e1 100644 --- a/reports/pretty.py +++ b/reports/pretty.py @@ -197,7 +197,7 @@ if __name__ == '__main__': if store_report: values = {'json': json.dumps(raw_report), - 'created': float(dt.dt_to_decimal(datetime.datetime.utcnow())), + 'created': dt.dt_to_decimal(datetime.datetime.utcnow()), 'period_start': start, 'period_end': end, 'version': 1, From 2f839152e2cf4ef8126a0071d2156b1c2ad7df29 Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Tue, 19 Feb 2013 10:34:35 -0400 Subject: [PATCH 07/13] REST reports command works with 2.6 --- stacktach/stacky_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stacktach/stacky_server.py b/stacktach/stacky_server.py index 6f47e0f..5543aa3 100644 --- a/stacktach/stacky_server.py +++ b/stacktach/stacky_server.py @@ -383,8 +383,8 @@ def do_jsonreports(request): now = datetime.datetime.utcnow() yesterday = dt.dt_to_decimal(yesterday) now = dt.dt_to_decimal(now) - _from = float(request.GET.get('created_from', yesterday)) - _to = float(request.GET.get('created_to', now)) + _from = request.GET.get('created_from', yesterday) + _to = request.GET.get('created_to', now) reports = models.JsonReport.objects.filter(created__gte=_from, created__lte=_to) results = [] From 1eae8c45314f4094efcc0a9f0059da490dee3f62 Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Tue, 19 Feb 2013 12:26:07 -0400 Subject: [PATCH 08/13] Reports by region --- reports/pretty.py | 36 ++++++++++++++++++++++++++++++------ stacktach/models.py | 3 +++ 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/reports/pretty.py b/reports/pretty.py index 29990e1..c880408 100644 --- a/reports/pretty.py +++ b/reports/pretty.py @@ -14,7 +14,7 @@ from stacktach import models def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, - store=False): + store=False, region=None): if not yesterday: yesterday = datetime.datetime.utcnow().date() - \ datetime.timedelta(days=1) @@ -28,9 +28,25 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, codes = {} + cells = [] + regions = [] + if region: + region = region.upper() + deployments = models.Deployment.objects.all() + for deployment in deployments: + name = deployment.name.upper() + if not region or region in name: + regions.append(deployment.id) + cells.append(deployment.name) + + if not len(regions): + print "No regions found for '%s'" % region + sys.exit(1) + # Get all the instances that have changed in the last N hours ... updates = models.RawData.objects.filter(event='compute.instance.update', - when__gt=dstart, when__lte=dend)\ + when__gt=dstart, when__lte=dend, + deployment__in=regions)\ .values('instance').distinct() expiry = 60 * 60 # 1 hour @@ -108,7 +124,8 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, report = [] pct = (float(100 - percentile) / 2.0) / 100.0 details = {'percentile': percentile, 'pct': pct, 'hours': hours, - 'start': float(dstart), 'end': float(dend)} + 'start': float(dstart), 'end': float(dend), 'region': region, + 'cells': cells} report.append(details) cols = ["Operation", "Image", "Min*", "Max*", "Avg*", @@ -167,6 +184,8 @@ if __name__ == '__main__': parser.add_argument('--utcdate', help='Report start date YYYY-MM-DD. Default yesterday midnight.', type=valid_date, default=None) + parser.add_argument('--region', + help='Report Region. Default is all regions.', default=None) parser.add_argument('--hours', help='Report span in hours. Default: 24', default=24, type=int) @@ -189,19 +208,24 @@ if __name__ == '__main__': hours = args.hours start_hour = args.start_hour store_report = args.store + region = args.region start, end, raw_report = make_report(yesterday, start_hour, hours, - percentile, store_report) + percentile, store_report, region) details = raw_report[0] pct = details['pct'] + region_name = "all" + if region: + region_name = region + if store_report: values = {'json': json.dumps(raw_report), 'created': dt.dt_to_decimal(datetime.datetime.utcnow()), 'period_start': start, 'period_end': end, 'version': 1, - 'name': 'summary report'} + 'name': 'summary for region: %s' % region_name} report = models.JsonReport(**values) report.save() print "Report stored (id=%d)" % report.id @@ -209,7 +233,7 @@ if __name__ == '__main__': if args.silent: sys.exit(1) - print "Report for %s to %s" % (start, end) + print "'%s' Report for %s to %s" % (region_name, start, end) cols = raw_report[1] diff --git a/stacktach/models.py b/stacktach/models.py index a8dae21..b405194 100644 --- a/stacktach/models.py +++ b/stacktach/models.py @@ -20,6 +20,9 @@ from django.db import models class Deployment(models.Model): name = models.CharField(max_length=50) + def __repr__(self): + return self.name + class RawData(models.Model): deployment = models.ForeignKey(Deployment) From 1ac5678d156b1d2939cd41cd9cd4b4dd4cd92faf Mon Sep 17 00:00:00 2001 From: root Date: Tue, 19 Feb 2013 10:30:50 -0600 Subject: [PATCH 09/13] local changes --- reports/pretty.py | 2 +- stacktach/stacky_server.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/reports/pretty.py b/reports/pretty.py index e08c6c0..29990e1 100644 --- a/reports/pretty.py +++ b/reports/pretty.py @@ -197,7 +197,7 @@ if __name__ == '__main__': if store_report: values = {'json': json.dumps(raw_report), - 'created': float(dt.dt_to_decimal(datetime.datetime.utcnow())), + 'created': dt.dt_to_decimal(datetime.datetime.utcnow()), 'period_start': start, 'period_end': end, 'version': 1, diff --git a/stacktach/stacky_server.py b/stacktach/stacky_server.py index 6f47e0f..5543aa3 100644 --- a/stacktach/stacky_server.py +++ b/stacktach/stacky_server.py @@ -383,8 +383,8 @@ def do_jsonreports(request): now = datetime.datetime.utcnow() yesterday = dt.dt_to_decimal(yesterday) now = dt.dt_to_decimal(now) - _from = float(request.GET.get('created_from', yesterday)) - _to = float(request.GET.get('created_to', now)) + _from = request.GET.get('created_from', yesterday) + _to = request.GET.get('created_to', now) reports = models.JsonReport.objects.filter(created__gte=_from, created__lte=_to) results = [] From 65c900afe10162177eb6fbba05ebf9e6d6cf6971 Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Tue, 19 Feb 2013 16:50:04 -0400 Subject: [PATCH 10/13] error breakdown columns --- reports/pretty.py | 75 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 19 deletions(-) diff --git a/reports/pretty.py b/reports/pretty.py index c880408..73060f8 100644 --- a/reports/pretty.py +++ b/reports/pretty.py @@ -52,7 +52,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, expiry = 60 * 60 # 1 hour cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot'] - failures = {} + failures = {} # { key : {failure_type: count} } durations = {} attempts = {} @@ -66,7 +66,6 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, for req_dict in reqs: - report = False req = req_dict['request_id'] raws = models.RawData.objects.filter(request_id=req)\ .exclude(event='compute.instance.exists')\ @@ -74,6 +73,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, start = None err = None + failure_type = None operation = "aux" image_type_num = 0 @@ -83,7 +83,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, start = raw.when if 'error' in raw.routing_key: err = raw - report = True + failure_type = 'http' for cmd in cmds: if cmd in raw.event: @@ -106,7 +106,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, diff = end - start if diff > 3600: - report = True + failure_type = '> 60' key = (operation, image) @@ -117,8 +117,20 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, attempts[key] = attempts.get(key, 0) + 1 - if report: - failures[key] = failures.get(key, 0) + 1 + if failure_type: + if err: + queue, body = json.loads(err.json) + payload = body['payload'] + exc = payload.get('exception') + if exc: + code = int(exc.get('kwargs', {}).get('code', 0)) + if code >= 400 and code < 500: + failure_type = "4xx" + if code >= 500 and code < 600: + failure_type = "5xx" + breakdown = failures.get(key, {}) + breakdown[failure_type] = breakdown.get(failure_type, 0) + 1 + failures[key] = breakdown # Summarize the results ... report = [] @@ -128,19 +140,32 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, 'cells': cells} report.append(details) + failure_types = ["4xx", "5xx", "> 60"] cols = ["Operation", "Image", "Min*", "Max*", "Avg*", - "Requests", "# Fail", "Fail %"] + "Requests"] + for failure_type in failure_types: + cols.append("%s" % failure_type) + cols.append("%% %s" % failure_type) report.append(cols) total = 0 - failure_total = 0 + failure_totals = {} for key, count in attempts.iteritems(): total += count operation, image = key - failure_count = failures.get(key, 0) - failure_total += failure_count - failure_percentage = float(failure_count) / float(count) + breakdown = failures.get(key, {}) + this_failure_pair = [] + for failure_type in failure_types: + # Failure counts for this attempt. + # Sum for grand totals. + failure_count = breakdown.get(failure_type, 0) + failure_totals[failure_type] = \ + failure_totals.get(failure_type, 0) + failure_count + + # Failure percentage for this attempt. + percentage = float(failure_count) / float(count) + this_failure_pair.append((failure_count, percentage)) # N-th % of durations ... _values = durations[key] @@ -161,12 +186,23 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, _fmax = dt.sec_to_str(_max) _favg = dt.sec_to_str(_avg) - report.append([operation, image, _fmin, _fmax, _favg, count, - failure_count, failure_percentage]) + row = [operation, image, _fmin, _fmax, _favg, count] + for failure_count, failure_percentage in this_failure_pair: + row.append(failure_count) + row.append(failure_percentage) + report.append(row) details['total'] = total - details['failure_total'] = failure_total - details['failure_rate'] = (float(failure_total)/float(total)) * 100.0 + failure_grand_total = 0 + for failure_type in failure_types: + failure_total = failure_totals.get(failure_type, 0) + failure_grand_total += failure_total + details["%s failure count" % failure_type] = failure_total + failure_percentage = (float(failure_total)/float(total)) * 100.0 + details["%s failure percentage" % failure_type] = failure_percentage + + details['failure_grand_total'] = failure_grand_total + details['failure_grand_rate'] = (float(failure_grand_total)/float(total)) * 100.0 return (rstart, rend, report) @@ -224,7 +260,7 @@ if __name__ == '__main__': 'created': dt.dt_to_decimal(datetime.datetime.utcnow()), 'period_start': start, 'period_end': end, - 'version': 1, + 'version': 2, 'name': 'summary for region: %s' % region_name} report = models.JsonReport(**values) report.save() @@ -247,12 +283,13 @@ if __name__ == '__main__': (percentile, pct * 100.0) for row in raw_report[2:]: frow = row[:] - frow[-1] = "%.1f%%" % (row[-1] * 100.0) + for col in [7, 9, 11]: + frow[col] = "%.1f%%" % (row[col] * 100.0) p.add_row(frow) print p total = details['total'] - failure_total = details['failure_total'] - failure_rate = details['failure_rate'] + failure_total = details['failure_grand_total'] + failure_rate = details['failure_grand_rate'] print "Total: %d, Failures: %d, Failure Rate: %.1f%%" % \ (total, failure_total, failure_rate) From 8f970c7dca8b271ffcdb4e851f57de77c376e730 Mon Sep 17 00:00:00 2001 From: mendeni Date: Thu, 21 Feb 2013 09:50:27 -0500 Subject: [PATCH 11/13] Use system time zone --- settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/settings.py b/settings.py index 7233fcf..01da733 100644 --- a/settings.py +++ b/settings.py @@ -46,7 +46,7 @@ DATABASES = { # timezone as the operating system. # If running in a Windows environment this must be set to the same as your # system time zone. -TIME_ZONE = 'America/Chicago' +TIME_ZONE = None # Language code for this installation. All choices can be found here: # http://www.i18nguy.com/unicode/language-identifiers.html From 6eb284c7e7936d24fbabbd7e58f178c630f901d0 Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Mon, 25 Feb 2013 16:39:32 +0000 Subject: [PATCH 12/13] Include transitions to error state --- reports/pretty.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/reports/pretty.py b/reports/pretty.py index 73060f8..0857f2a 100644 --- a/reports/pretty.py +++ b/reports/pretty.py @@ -81,10 +81,18 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, for raw in raws: if not start: start = raw.when + if 'error' in raw.routing_key: err = raw failure_type = 'http' + if raw.old_state != 'error' and raw.state == 'error': + failure_type = 'state' + + if raw.old_state == 'error' and \ + (not raw.state in ['deleted', 'error']): + failure_type = None + for cmd in cmds: if cmd in raw.event: operation = cmd @@ -140,7 +148,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, 'cells': cells} report.append(details) - failure_types = ["4xx", "5xx", "> 60"] + failure_types = ["4xx", "5xx", "> 60", "state"] cols = ["Operation", "Image", "Min*", "Max*", "Avg*", "Requests"] for failure_type in failure_types: From 0c7cc646225e02e4ab743433c995f5414a1d673c Mon Sep 17 00:00:00 2001 From: Sandy Walsh Date: Mon, 25 Feb 2013 18:41:44 +0000 Subject: [PATCH 13/13] 97 percentile and median --- reports/pretty.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/reports/pretty.py b/reports/pretty.py index 0857f2a..f3227e5 100644 --- a/reports/pretty.py +++ b/reports/pretty.py @@ -13,7 +13,7 @@ from stacktach import image_type from stacktach import models -def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, +def make_report(yesterday=None, start_hour=0, hours=24, percentile=97, store=False, region=None): if not yesterday: yesterday = datetime.datetime.utcnow().date() - \ @@ -149,7 +149,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, report.append(details) failure_types = ["4xx", "5xx", "> 60", "state"] - cols = ["Operation", "Image", "Min*", "Max*", "Avg*", + cols = ["Operation", "Image", "Min", "Max", "Med", "%d%%" % percentile, "Requests"] for failure_type in failure_types: cols.append("%s" % failure_type) @@ -178,10 +178,6 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, # N-th % of durations ... _values = durations[key] _values.sort() - _outliers = int(float(len(_values)) * pct) - if _outliers > 0: - before = len(_values) - _values = _values[_outliers:-_outliers] _min = 99999999 _max = 0 _total = 0.0 @@ -189,12 +185,20 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, _min = min(_min, value) _max = max(_max, value) _total += float(value) - _avg = float(_total) / float(len(_values)) + _num = len(_values) + _avg = float(_total) / float(_num) + half = _num / 2 + _median = _values[half] + _percentile_index = int((float(percentile) / 100.0) * float(_num)) + _percentile = _values[_percentile_index] + _fmin = dt.sec_to_str(_min) _fmax = dt.sec_to_str(_max) _favg = dt.sec_to_str(_avg) + _fmedian = dt.sec_to_str(_median) + _fpercentile = dt.sec_to_str(_percentile) - row = [operation, image, _fmin, _fmax, _favg, count] + row = [operation, image, _fmin, _fmax, _fmedian, _fpercentile, count] for failure_count, failure_percentage in this_failure_pair: row.append(failure_count) row.append(failure_percentage) @@ -237,7 +241,7 @@ if __name__ == '__main__': help='Starting hour 0-23. Default: 0', default=0, type=int) parser.add_argument('--percentile', - help='Percentile for timings. Default: 90', default=90, + help='Percentile for timings. Default: 97', default=97, type=int) parser.add_argument('--store', help='Store report in database. Default: False', @@ -268,7 +272,7 @@ if __name__ == '__main__': 'created': dt.dt_to_decimal(datetime.datetime.utcnow()), 'period_start': start, 'period_end': end, - 'version': 2, + 'version': 3, 'name': 'summary for region: %s' % region_name} report = models.JsonReport(**values) report.save() @@ -287,11 +291,9 @@ if __name__ == '__main__': p.align[c] = 'r' p.sortby = cols[0] - print "* Using %d-th percentile for results (+/-%.1f%% cut)" % \ - (percentile, pct * 100.0) for row in raw_report[2:]: frow = row[:] - for col in [7, 9, 11]: + for col in [8, 10, 12, 14]: frow[col] = "%.1f%%" % (row[col] * 100.0) p.add_row(frow) print p