diff --git a/migrations/002_delta.sql b/migrations/002_delta.sql new file mode 100644 index 0000000..eb89393 --- /dev/null +++ b/migrations/002_delta.sql @@ -0,0 +1,2 @@ +ALTER TABLE stacktach_rawdata ADD task VARCHAR(30); +CREATE INDEX `stacktach_rawdata_1c149b74` ON `stacktach_rawdata` (`task`); diff --git a/migrations/003_delta.sql b/migrations/003_delta.sql new file mode 100644 index 0000000..8c949bf --- /dev/null +++ b/migrations/003_delta.sql @@ -0,0 +1,2 @@ +ALTER TABLE stacktach_rawdata ADD image_type integer; +CREATE INDEX `stacktach_rawdata_cfde77eb` ON `stacktach_rawdata` (`image_type`); diff --git a/migrations/003_image_type.sql b/migrations/003_image_type.sql new file mode 100644 index 0000000..133d383 --- /dev/null +++ b/migrations/003_image_type.sql @@ -0,0 +1,63 @@ +BEGIN; +CREATE TABLE `stacktach_deployment` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL +) +; +CREATE TABLE `stacktach_rawdata` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `deployment_id` integer NOT NULL, + `tenant` varchar(50), + `json` longtext NOT NULL, + `routing_key` varchar(50), + `state` varchar(20), + `old_state` varchar(20), + `old_task` varchar(30), + `task` varchar(30), + `image_type` integer, + `when` numeric(20, 6) NOT NULL, + `publisher` varchar(100), + `event` varchar(50), + `service` varchar(50), + `host` varchar(100), + `instance` varchar(50), + `request_id` varchar(50) +) +; +ALTER TABLE `stacktach_rawdata` ADD CONSTRAINT `deployment_id_refs_id_362370d` FOREIGN KEY (`deployment_id`) REFERENCES `stacktach_deployment` (`id`); +CREATE TABLE `stacktach_lifecycle` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `instance` varchar(50), + `last_state` varchar(50), + `last_task_state` varchar(50), + `last_raw_id` integer +) +; +ALTER TABLE `stacktach_lifecycle` ADD CONSTRAINT `last_raw_id_refs_id_d5fb17d3` FOREIGN KEY (`last_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_timing` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `name` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `start_raw_id` integer, + `end_raw_id` integer, + `start_when` numeric(20, 6), + `end_when` numeric(20, 6), + `diff` numeric(20, 6) +) +; +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `lifecycle_id_refs_id_4255ead8` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `start_raw_id_refs_id_c32dfe04` FOREIGN KEY (`start_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +ALTER TABLE `stacktach_timing` ADD CONSTRAINT `end_raw_id_refs_id_c32dfe04` FOREIGN KEY (`end_raw_id`) REFERENCES `stacktach_rawdata` (`id`); +CREATE TABLE `stacktach_requesttracker` ( + `id` integer AUTO_INCREMENT NOT NULL PRIMARY KEY, + `request_id` varchar(50) NOT NULL, + `lifecycle_id` integer NOT NULL, + `last_timing_id` integer, + `start` numeric(20, 6) NOT NULL, + `duration` numeric(20, 6) NOT NULL, + `completed` bool NOT NULL +) +; +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `lifecycle_id_refs_id_e457729` FOREIGN KEY (`lifecycle_id`) REFERENCES `stacktach_lifecycle` (`id`); +ALTER TABLE `stacktach_requesttracker` ADD CONSTRAINT `last_timing_id_refs_id_f0827cca` FOREIGN KEY (`last_timing_id`) REFERENCES `stacktach_timing` (`id`); +COMMIT; diff --git a/migrations/003_image_type_indexes.sql b/migrations/003_image_type_indexes.sql new file mode 100644 index 0000000..ada6b63 --- /dev/null +++ b/migrations/003_image_type_indexes.sql @@ -0,0 +1,32 @@ +BEGIN; +CREATE INDEX `stacktach_rawdata_4ac6801` ON `stacktach_rawdata` (`deployment_id`); +CREATE INDEX `stacktach_rawdata_2207f86d` ON `stacktach_rawdata` (`tenant`); +CREATE INDEX `stacktach_rawdata_2192f43a` ON `stacktach_rawdata` (`routing_key`); +CREATE INDEX `stacktach_rawdata_355bfc27` ON `stacktach_rawdata` (`state`); +CREATE INDEX `stacktach_rawdata_b716e0bb` ON `stacktach_rawdata` (`old_state`); +CREATE INDEX `stacktach_rawdata_8182be12` ON `stacktach_rawdata` (`old_task`); +CREATE INDEX `stacktach_rawdata_1c149b74` ON `stacktach_rawdata` (`task`); +CREATE INDEX `stacktach_rawdata_cfde77eb` ON `stacktach_rawdata` (`image_type`); +CREATE INDEX `stacktach_rawdata_feaed089` ON `stacktach_rawdata` (`when`); +CREATE INDEX `stacktach_rawdata_878a2906` ON `stacktach_rawdata` (`publisher`); +CREATE INDEX `stacktach_rawdata_a90f9116` ON `stacktach_rawdata` (`event`); +CREATE INDEX `stacktach_rawdata_52c5ef6b` ON `stacktach_rawdata` (`service`); +CREATE INDEX `stacktach_rawdata_38dbea87` ON `stacktach_rawdata` (`host`); +CREATE INDEX `stacktach_rawdata_888b756a` ON `stacktach_rawdata` (`instance`); +CREATE INDEX `stacktach_rawdata_792812e8` ON `stacktach_rawdata` (`request_id`); +CREATE INDEX `stacktach_lifecycle_888b756a` ON `stacktach_lifecycle` (`instance`); +CREATE INDEX `stacktach_lifecycle_9b2555fd` ON `stacktach_lifecycle` (`last_state`); +CREATE INDEX `stacktach_lifecycle_67421a0e` ON `stacktach_lifecycle` (`last_task_state`); +CREATE INDEX `stacktach_lifecycle_dcf9e5f3` ON `stacktach_lifecycle` (`last_raw_id`); +CREATE INDEX `stacktach_timing_52094d6e` ON `stacktach_timing` (`name`); +CREATE INDEX `stacktach_timing_9f222e6b` ON `stacktach_timing` (`lifecycle_id`); +CREATE INDEX `stacktach_timing_efab905a` ON `stacktach_timing` (`start_raw_id`); +CREATE INDEX `stacktach_timing_c8bb8daf` ON `stacktach_timing` (`end_raw_id`); +CREATE INDEX `stacktach_timing_4401d15e` ON `stacktach_timing` (`diff`); +CREATE INDEX `stacktach_requesttracker_792812e8` ON `stacktach_requesttracker` (`request_id`); +CREATE INDEX `stacktach_requesttracker_9f222e6b` ON `stacktach_requesttracker` (`lifecycle_id`); +CREATE INDEX `stacktach_requesttracker_ce616a96` ON `stacktach_requesttracker` (`last_timing_id`); +CREATE INDEX `stacktach_requesttracker_29f4f2ea` ON `stacktach_requesttracker` (`start`); +CREATE INDEX `stacktach_requesttracker_8eb45f9b` ON `stacktach_requesttracker` (`duration`); +CREATE INDEX `stacktach_requesttracker_e490d511` ON `stacktach_requesttracker` (`completed`); +COMMIT; diff --git a/migrations/003_populate_task_and_image.py b/migrations/003_populate_task_and_image.py index 01b6619..eb5eb4d 100644 --- a/migrations/003_populate_task_and_image.py +++ b/migrations/003_populate_task_and_image.py @@ -39,7 +39,7 @@ def fix_chunk(hours, length): states[task] = states.get(task, 0) + 1 raw.task = task - image_type_num = image_type.get_numeric_code(payload) + raw.image_type = image_type.get_numeric_code(payload, raw.image_type) updated += 1 raw.save() diff --git a/reports/requests.py b/reports/requests.py index b12ce9c..7c3aced 100644 --- a/reports/requests.py +++ b/reports/requests.py @@ -2,6 +2,8 @@ import datetime import json import sys +import prettytable + sys.path.append("/stacktach") from stacktach import datetime_to_decimal as dt @@ -34,6 +36,9 @@ expiry = 60 * 60 # 1 hour cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot'] failures = {} +causes = {} +error_messages = {} +successes = {} tenant_issues = {} for uuid_dict in updates: @@ -55,10 +60,10 @@ for uuid_dict in updates: start = None err = None - operation = None + operation = "n/a" platform = 0 tenant = 0 - dump = False + cell = "n/a" for raw in raws: if not start: @@ -75,20 +80,12 @@ for uuid_dict in updates: for cmd in cmds: if cmd in raw.event: operation = cmd + cell = raw.deployment.name break if raw.image_type > 0: platform = raw.image_type - if dump: - print " %s %s T:%s %s %s %s %s %s"\ - % (raw.id, raw.routing_key, raw.tenant, - raw.service, raw.host, raw.deployment.name, - raw.event, dt.dt_from_decimal(raw.when)) - if raw.event == 'compute.instance.update': - print " State: %s->%s, Task %s->%s" % \ - (raw.old_state, raw.state, raw.old_task, raw.task) - if not start: continue @@ -98,13 +95,16 @@ for uuid_dict in updates: if diff > 3600: report = True - if report: + key = (operation, platform, cell) + if not report: + successes[key] = successes.get(key, 0) + 1 + else: print "------", uuid, "----------" print " Req:", req print " Duration: %.2f minutes" % (diff / 60) print " Operation:", operation print " Platform:", image_type.readable(platform) - key = (operation, platform) + cause = "> %d min" % (expiry / 60) failures[key] = failures.get(key, 0) + 1 tenant_issues[tenant] = tenant_issues.get(tenant, 0) + 1 @@ -117,24 +117,103 @@ for uuid_dict in updates: err.event, dt.dt_from_decimal(err.when)) exc = payload.get('exception') if exc: - print exc + # group the messages ... + exc_str = str(exc) + print exc_str + error_messages[exc_str] = error_messages.get(exc_str, 0) + 1 + + # extract the code, if any ... code = exc.get('kwargs', {}).get('code') if code: codes[code] = codes.get(code, 0) + 1 + cause = code + cause_key = (key, cause) + causes[cause_key] = causes.get(cause_key, 0) + 1 -print "-- Failures by operation by platform --" -for failure, count in failures.iteritems(): - operation, platform = failure + +def dump_breakdown(totals, label): + p = prettytable.PrettyTable(["Category", "Count"]) + for k, v in totals.iteritems(): + p.add_row([k, v]) + print label + p.sortby = 'Count' + print p + + +def dump_summary(info, label): + print "-- %s by operation by cell by platform --" % (label,) + p = prettytable.PrettyTable(["Operation", "Cell", "Platform", "Count"]) + total = 0 + op_totals = {} + cell_totals = {} + platform_totals = {} + for key, count in info.iteritems(): + operation, platform, cell = key + readable = image_type.readable(platform) + text = "n/a" + if readable: + text = ", ".join(readable) + op_totals[operation] = op_totals.get(operation, 0) + count + cell_totals[cell] = cell_totals.get(cell, 0) + count + platform_totals[text] = platform_totals.get(text, 0) + count + + p.add_row([operation, cell, text, count]) + total += count + p.sortby = 'Count' + print p + + dump_breakdown(op_totals, "Total %s by Operation" % label) + dump_breakdown(cell_totals, "Total %s by Cell" % label) + dump_breakdown(platform_totals, "Total %s by Platform" % label) + + print + + return total + + +print +print "SUMMARY" +print +good = dump_summary(successes, "Success") +bad = dump_summary(failures, "Failures") +print "=====================================================" +print "Total Success: %d Total Failure: %d" % (good, bad) +print + +print "-- Errors by Tenant --" +p = prettytable.PrettyTable(["Tenant", "Count"]) +for tenant, count in tenant_issues.iteritems(): + p.add_row([tenant, count]) +p.sortby = 'Count' +print p + +print +print "-- Return code counts --" +p = prettytable.PrettyTable(["Return Code", "Count"]) +for k, v in codes.iteritems(): + p.add_row([k, v]) +p.sortby = 'Count' +print p + +print +print "-- Cause breakdown --" +p = prettytable.PrettyTable(["Cause", "Operation", "Cell", "Platform", "Count"]) +for cause_key, count in causes.iteritems(): + key, cause = cause_key + operation, platform, cell = key readable = image_type.readable(platform) text = "n/a" if readable: text = ", ".join(readable) - print "%s on %s = %d" % (operation, text, count) + p.add_row([cause, operation, cell, text, count]) +p.sortby = 'Count' +print p -print "-- Errors by Tenant --" -for tenant, count in tenant_issues.iteritems(): - print "T %s = %d" % (tenant, count) +print +print "-- Error Message Counts --" +p = prettytable.PrettyTable(["Count", "Message"]) +for k, v in error_messages.iteritems(): + p.add_row([v, k[:80]]) +p.sortby = 'Count' +print p -print "-- Return code counts --" -for k, v in codes.iteritems(): - print k, v diff --git a/stacktach/image_type.py b/stacktach/image_type.py new file mode 100644 index 0000000..33af6ad --- /dev/null +++ b/stacktach/image_type.py @@ -0,0 +1,58 @@ +BASE_IMAGE = 0x1 +SNAPSHOT_IMAGE = 0x2 +LINUX_IMAGE = 0x10 + +OS_UBUNTU = 0x100 +OS_DEBIAN = 0x200 +OS_CENTOS = 0x400 +OS_RHEL = 0x800 + + +def isset(num, flag): + return num & flag > 0 + + +flags = {'base' : BASE_IMAGE, + 'snapshot' : SNAPSHOT_IMAGE, + 'linux' : LINUX_IMAGE, + 'ubuntu' : OS_UBUNTU, + 'debian' : OS_DEBIAN, + 'centos' : OS_CENTOS, + 'rhel' : OS_RHEL} + + +def readable(num): + result = [] + for k, v in flags.iteritems(): + if isset(num, v): + result.append(k) + return result + + +def get_numeric_code(payload, default=0): + meta = payload.get('image_meta', {}) + if default == None: + default = 0 + num = default + + image_type = meta.get('image_type', '') + if image_type == 'base': + num |= BASE_IMAGE + if image_type == 'snapshot': + num |= SNAPSHOT_IMAGE + + os_type = meta.get('os_type', '') + if os_type == 'linux': + num |= LINUX_IMAGE + + os_distro = meta.get('os_distro', '') + if os_distro == 'ubuntu': + num |= OS_UBUNTU + if os_distro == 'debian': + num |= OS_DEBIAN + if os_distro == 'centos': + num |= OS_CENTOS + if os_distro == 'rhel': + num |= OS_RHEL + + return num