started on refactoring for better command line support

This commit is contained in:
Sandy Walsh 2013-02-15 15:05:43 -04:00
parent add50385a7
commit ef92c33f74
3 changed files with 180 additions and 122 deletions

View File

@ -1,3 +1,4 @@
import argparse
import datetime
import json
import sys
@ -6,161 +7,207 @@ import time
import prettytable
sys.path.append("/stacktach")
sys.path.append(".")
from stacktach import datetime_to_decimal as dt
from stacktach import image_type
from stacktach import models
if __name__ != '__main__':
sys.exit(1)
def make_report(yesterday=None, start_hour=0, hours=24, percentile=90, store=False):
if not yesterday:
yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(days=1)
yesterday = datetime.datetime.utcnow().date() - datetime.timedelta(days=1)
if len(sys.argv) == 2:
try:
t = time.strptime(sys.argv[1], "%Y-%m-%d")
yesterday = datetime.datetime(*t[:6])
except Exception, e:
print e
print "Usage: python requests.py YYYY-MM-DD (the end date)"
sys.exit(1)
start = datetime.datetime(year=yesterday.year, month=yesterday.month,
day=yesterday.day, hour=start_hour)
end = start + datetime.timedelta(hours=hours-1, minutes=59, seconds=59)
percentile = 90
hours = 24
dstart = dt.dt_to_decimal(start)
dend = dt.dt_to_decimal(end)
start = datetime.datetime(year=yesterday.year, month=yesterday.month,
day=yesterday.day)
end = start + datetime.timedelta(hours=hours-1, minutes=59, seconds=59)
codes = {}
print "Generating report for %s to %s" % (start, end)
# Get all the instances that have changed in the last N hours ...
updates = models.RawData.objects.filter(event='compute.instance.update',
when__gt=dstart, when__lte=dend)\
.values('instance').distinct()
dstart = dt.dt_to_decimal(start)
dend = dt.dt_to_decimal(end)
expiry = 60 * 60 # 1 hour
cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot']
codes = {}
failures = {}
durations = {}
attempts = {}
# Get all the instances that have changed in the last N hours ...
updates = models.RawData.objects.filter(event='compute.instance.update',
when__gt=dstart, when__lte=dend)\
.values('instance').distinct()
for uuid_dict in updates:
uuid = uuid_dict['instance']
expiry = 60 * 60 # 1 hour
cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot']
failures = {}
durations = {}
attempts = {}
for uuid_dict in updates:
uuid = uuid_dict['instance']
# All the unique Request ID's for this instance during that timespan.
reqs = models.RawData.objects.filter(instance=uuid,
when__gt=dstart, when__lte=dend) \
.values('request_id').distinct()
# All the unique Request ID's for this instance during that timespan.
reqs = models.RawData.objects.filter(instance=uuid,
when__gt=dstart, when__lte=dend) \
.values('request_id').distinct()
for req_dict in reqs:
report = False
req = req_dict['request_id']
raws = models.RawData.objects.filter(request_id=req)\
.exclude(event='compute.instance.exists')\
.order_by('when')
for req_dict in reqs:
report = False
req = req_dict['request_id']
raws = models.RawData.objects.filter(request_id=req)\
.exclude(event='compute.instance.exists')\
.order_by('when')
start = None
err = None
start = None
err = None
operation = "aux"
image_type_num = 0
operation = "aux"
image_type_num = 0
for raw in raws:
if not start:
start = raw.when
if 'error' in raw.routing_key:
err = raw
report = True
for cmd in cmds:
if cmd in raw.event:
operation = cmd
break
if raw.image_type:
image_type_num |= raw.image_type
image = "?"
if image_type.isset(image_type_num, image_type.BASE_IMAGE):
image = "base"
if image_type.isset(image_type_num, image_type.SNAPSHOT_IMAGE):
image = "snap"
for raw in raws:
if not start:
start = raw.when
if 'error' in raw.routing_key:
err = raw
continue
end = raw.when
diff = end - start
if diff > 3600:
report = True
for cmd in cmds:
if cmd in raw.event:
operation = cmd
break
key = (operation, image)
if raw.image_type:
image_type_num |= raw.image_type
# Track durations for all attempts, good and bad ...
_durations = durations.get(key, [])
_durations.append(diff)
durations[key] = _durations
image = "?"
if image_type.isset(image_type_num, image_type.BASE_IMAGE):
image = "base"
if image_type.isset(image_type_num, image_type.SNAPSHOT_IMAGE):
image = "snap"
attempts[key] = attempts.get(key, 0) + 1
if not start:
continue
if report:
failures[key] = failures.get(key, 0) + 1
end = raw.when
diff = end - start
# Summarize the results ...
report = []
pct = (float(100 - percentile) / 2.0) / 100.0
details = {'percentile': percentile, 'pct': pct, 'hours': hours,
'start': start, 'end': end}
report.append(details)
if diff > 3600:
report = True
cols = ["Operation", "Image", "Min*", "Max*", "Avg*",
"Requests", "# Fail", "Fail %"]
report.append(cols)
key = (operation, image)
total = 0
failure_total = 0
for key, count in attempts.iteritems():
total += count
operation, image = key
# Track durations for all attempts, good and bad ...
_durations = durations.get(key, [])
_durations.append(diff)
durations[key] = _durations
failure_count = failures.get(key, 0)
failure_total += failure_count
failure_percentage = float(failure_count) / float(count)
attempts[key] = attempts.get(key, 0) + 1
# N-th % of durations ...
_values = durations[key]
_values.sort()
_outliers = int(float(len(_values)) * pct)
if _outliers > 0:
before = len(_values)
_values = _values[_outliers:-_outliers]
_min = 99999999
_max = 0
_total = 0.0
for value in _values:
_min = min(_min, value)
_max = max(_max, value)
_total += float(value)
_avg = float(_total) / float(len(_values))
_fmin = dt.sec_to_str(_min)
_fmax = dt.sec_to_str(_max)
_favg = dt.sec_to_str(_avg)
if report:
failures[key] = failures.get(key, 0) + 1
report.add_row([operation, image, _fmin, _fmax, _favg, count,
failure_count, failure_percentage])
# Print the results ...
cols = ["Operation", "Image", "Min*", "Max*", "Avg*",
"Requests", "# Fail", "Fail %"]
p = prettytable.PrettyTable(cols)
for c in cols[2:]:
p.align[c] = 'r'
p.sortby = cols[0]
details['total'] = total
details['failures'] = failures
details['failure_rate'] = (float(failure_total)/float(total)) * 100.0
return report
pct = (float(100 - percentile) / 2.0) / 100.0
print "* Using %d-th percentile for results (+/-%.1f%% cut)" % \
(percentile, pct * 100.0)
total = 0
failure_total = 0
for key, count in attempts.iteritems():
total += count
operation, image = key
failure_count = failures.get(key, 0)
failure_total += failure_count
failure_percentage = float(failure_count) / float(count)
_failure_percentage = "%.1f%%" % (failure_percentage * 100.0)
def valid_date(date):
try:
t = time.strptime(date, "%Y-%m-%d")
return datetime.datetime(*t[:6])
except Exception, e:
raise argparse.ArgumentTypeError("'%s' is not in YYYY-MM-DD format." % date)
# N-th % of durations ...
_values = durations[key]
_values.sort()
_outliers = int(float(len(_values)) * pct)
if _outliers > 0:
before = len(_values)
_values = _values[_outliers:-_outliers]
print "culling %d -> %d" % (before, len(_values))
_min = 99999999
_max = 0
_total = 0.0
for value in _values:
_min = min(_min, value)
_max = max(_max, value)
_total += float(value)
_avg = float(_total) / float(len(_values))
_fmin = dt.sec_to_str(_min)
_fmax = dt.sec_to_str(_max)
_favg = dt.sec_to_str(_avg)
p.add_row([operation, image, _fmin, _fmax, _favg, count,
failure_count, _failure_percentage])
print p
if __name__ == '__main__':
parser = argparse.ArgumentParser('StackTach Nova Usage Summary Report')
parser.add_argument('--utcdate', help='Report start date YYYY-MM-DD. Default yesterday midnight.',
type=valid_date, default=None)
parser.add_argument('--hours', help='Report span in hours. Default: 24', default=24, type=int)
parser.add_argument('--start_hour', help='Starting hour 0-23. Default: 0', default=0, type=int)
parser.add_argument('--percentile', help='Percentile for timings. Default: 90', default=90, type=int)
parser.add_argument('--store', help='Store report in database. Default: False', default=False,
action="store_true")
parser.add_argument('--silent', help="Do not show summary report. Default: False", default=False,
action="store_true")
args = parser.parse_args()
print "Total: %d, Failures: %d, Failure Rate: %.1f%%" % \
(total, failure_total,
(float(failure_total)/float(total)) * 100.0)
yesterday = args.utcdate
percentile = args.percentile
hours = args.hours
start_hour = args.start_hour
print args
sys.exit(1)
raw_report = make_report(yesterday, start_hour, hours, percentile, args['store'])
if not args.show:
sys.exit(1)
details = raw_report[0]
percentile = details['percentile']
pct = details['pct']
start = details['start']
end = details['end']
print "Report for %s to %s" % (start, end)
cols = raw_report[1]
# Print the results ...
p = prettytable.PrettyTable(cols)
for c in cols[2:]:
p.align[c] = 'r'
p.sortby = cols[0]
print "* Using %d-th percentile for results (+/-%.1f%% cut)" % \
(percentile, pct * 100.0)
for row in raw_report[2:]:
p.add_row(row)
print p
total = details['total']
failure_total = details['failure_total']
print "Total: %d, Failures: %d, Failure Rate: %.1f%%" % \
(total, failure_total,
(float(failure_total)/float(total)) * 100.0)

View File

@ -19,7 +19,7 @@ except ImportError:
db_password = os.environ['STACKTACH_DB_PASSWORD']
install_dir = os.environ['STACKTACH_INSTALL_DIR']
DEBUG = False
DEBUG = True
TEMPLATE_DEBUG = DEBUG
ADMINS = (

View File

@ -154,3 +154,14 @@ class RequestTracker(models.Model):
# Not used ... but soon hopefully.
completed = models.BooleanField(default=False, db_index=True)
class JsonReport(models.Model):
"""Stores cron-job reports in raw json format for extraction
via stacky/rest. All DateTimes are UTC."""
period_start = models.DateTimeField(db_index=True)
period_end = models.DateTimeField(db_index=True)
created = models.DateTimeField(db_index=True)
name = models.CharField(max_length=50, db_index=True)
version = models.IntegerField(default=1)
json = models.TextField()