stacktach/reports/nova_usage_audit.py
Sandy Walsh 8a0f06ac79 Freshen up with latest from RackerLabs (and include tox.ini)
Added instance hours report

Initial version of report to calculate unit hours used
for nova instances

Breakdown by flavor, flavor class, account/billing types and by tenant.

Moved license so script has shebang as the first line
Add tenant info cache.
Refactor Instance hr report.
Added cache table for basic tenant info for reports.
Refactor instance_hours report to use table.
Improve performance of tenant info update.

use bulk sql operations to speed up the tenant info update,
as it's taking ~40s/1000 tenants to update on a decent machine.

Fix some tests broken by rebase. Fix unittests broken by
rebase. Also, renumber migration due to collision.

Add Apache license header to new files.

Fixed bug with fetching deployment information in
reconciler. Reverted old method for fetching
current usage's deployment and added new method to
fetch latest deployment information for
a request_id.

Made the field mismatch error message more readable
Refactored nova and glance verifier tests

the exists are updated with 201 send_status as part of stacktach down repair mechanism

Revert "Fixed bug with fetching deployment information in"

Revert "Adding host and deployment info to missing exists entries in the nova usage audit"

Revert "Added column headers for host and deployment in json reports"

Only log ERROR on last retry

fixed the wrong status name for sent_failed variable in audit report

fixing documentation for urls that are not available for glance

deprecating stacky urls (usage, deletes, exists) that are not
used anymore

Revert "Revert "Added column headers for host and deployment in json reports""

Revert "Revert "Adding host and deployment info to missing exists entries in the nova usage audit""

Revert "Revert "Fixed bug with fetching deployment information in""

Cell and compute info added for verification failures as well.
If that is not present(request_id is not populated for an
InstanceUsage entry), the cells display '-'

Add tox support for move to stackforge

Add tox support for move to stackforge

Change-Id: Id94c2a7f1f9061e972e90c3f54e39c9dec11943b
2014-05-08 15:58:03 -03:00

333 lines
12 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import argparse
import datetime
import functools
import json
import sys
import os
sys.path.append(os.environ.get('STACKTACH_INSTALL_DIR', '/stacktach'))
import usage_audit
from stacktach.models import InstanceUsage
from stacktach import datetime_to_decimal as dt
from stacktach import models
from stacktach.reconciler import Reconciler
from stacktach import stacklog
OLD_LAUNCHES_QUERY = """
select stacktach_instanceusage.id,
stacktach_instanceusage.instance,
stacktach_instanceusage.launched_at from stacktach_instanceusage
left outer join stacktach_instancedeletes on
stacktach_instanceusage.instance = stacktach_instancedeletes.instance
left outer join stacktach_instancereconcile on
stacktach_instanceusage.instance = stacktach_instancereconcile.instance
where (
stacktach_instancereconcile.deleted_at is null and (
stacktach_instancedeletes.deleted_at is null or
stacktach_instancedeletes.deleted_at > %s
)
or (stacktach_instancereconcile.deleted_at is not null and
stacktach_instancereconcile.deleted_at > %s)
) and stacktach_instanceusage.launched_at < %s;"""
OLD_RECONCILES_QUERY = """
select stacktach_instancereconcile.id,
stacktach_instancereconcile.instance,
stacktach_instancereconcile.launched_at from stacktach_instancereconcile
left outer join stacktach_instancedeletes on
stacktach_instancereconcile.instance = stacktach_instancedeletes.instance
where (
stacktach_instancereconcile.deleted_at is null and (
stacktach_instancedeletes.deleted_at is null or
stacktach_instancedeletes.deleted_at > %s
)
or (stacktach_instancereconcile.deleted_at is not null and
stacktach_instancereconcile.deleted_at > %s)
) and stacktach_instancereconcile.launched_at < %s;"""
DEFAULT_UMS_OFFSET = 4 * 60 * 60 # 4 Hours
reconciler = None
def _get_new_launches(beginning, ending):
filters = {
'launched_at__gte': beginning,
'launched_at__lte': ending,
}
return models.InstanceUsage.objects.filter(**filters)
def _get_deletes(beginning, ending):
filters = {
'deleted_at__gte': beginning,
'deleted_at__lte': ending,
}
return models.InstanceDeletes.objects.filter(**filters)
def _get_exists(beginning, ending):
filters = {
'audit_period_beginning': beginning,
'audit_period_ending__gte': beginning,
'audit_period_ending__lte': ending,
}
return models.InstanceExists.objects.filter(**filters)
def cell_and_compute(instance, launched_at):
usage = InstanceUsage.find(instance, launched_at)[0]
deployment = usage.latest_deployment_for_request_id()
cell = (deployment and deployment.name) or '-'
compute = usage.host() or '-'
return cell, compute
def _audit_launches_to_exists(launches, exists, beginning):
fails = []
for (instance, launches) in launches.items():
if instance in exists:
for expected in launches:
found = False
for actual in exists[instance]:
if int(expected['launched_at']) == \
int(actual['launched_at']):
# HACK (apmelton): Truncate the decimal because we may not
# have the milliseconds.
found = True
if not found:
rec = False
if reconciler:
args = (expected['id'], beginning)
rec = reconciler.missing_exists_for_instance(*args)
launched_at = dt.dt_from_decimal(expected['launched_at'])
msg = "Couldn't find exists for launch (%s, %s)"
msg = msg % (instance, launched_at)
cell, compute = cell_and_compute(instance, launched_at)
fails.append(['Launch', expected['id'], msg,
'Y' if rec else 'N', cell, compute])
else:
rec = False
if reconciler:
args = (launches[0]['id'], beginning)
rec = reconciler.missing_exists_for_instance(*args)
msg = "No exists for instance (%s)" % instance
launched_at = dt.dt_from_decimal(launches[0]['launched_at'])
cell, compute = cell_and_compute(instance, launched_at)
fails.append(['-', msg, 'Y' if rec else 'N',
cell, compute])
return fails
def _launch_audit_for_period(beginning, ending):
launches_dict = {}
new_launches = _get_new_launches(beginning, ending)
for launch in new_launches:
instance = launch.instance
l = {'id': launch.id, 'launched_at': launch.launched_at}
if instance in launches_dict:
launches_dict[instance].append(l)
else:
launches_dict[instance] = [l, ]
# NOTE (apmelton)
# Django's safe substitution doesn't allow dict substitution...
# Thus, we send it 'beginning' three times...
old_launches = models.InstanceUsage.objects\
.raw(OLD_LAUNCHES_QUERY,
[beginning, beginning, beginning])
old_launches_dict = {}
for launch in old_launches:
instance = launch.instance
l = {'id': launch.id, 'launched_at': launch.launched_at}
if instance not in old_launches_dict or \
(old_launches_dict[instance]['launched_at'] <
launch.launched_at):
old_launches_dict[instance] = l
# NOTE (apmelton)
# Django's safe substitution doesn't allow dict substitution...
# Thus, we send it 'beginning' three times...
old_recs = models.InstanceReconcile.objects\
.raw(OLD_RECONCILES_QUERY,
[beginning, beginning, beginning])
for rec in old_recs:
instance = rec.instance
l = {'id': rec.id, 'launched_at': rec.launched_at}
if instance not in old_launches_dict or \
(old_launches_dict[instance]['launched_at'] <
rec.launched_at):
old_launches_dict[instance] = l
for instance, launch in old_launches_dict.items():
if instance in launches_dict:
launches_dict[instance].append(launch)
else:
launches_dict[instance] = [launch, ]
exists_dict = {}
exists = _get_exists(beginning, ending)
for exist in exists:
instance = exist.instance
e = {'id': exist.id,
'launched_at': exist.launched_at,
'deleted_at': exist.deleted_at}
if instance in exists_dict:
exists_dict[instance].append(e)
else:
exists_dict[instance] = [e, ]
launch_to_exists_fails = _audit_launches_to_exists(launches_dict,
exists_dict,
beginning)
return launch_to_exists_fails, new_launches.count(), len(old_launches_dict)
def audit_for_period(beginning, ending, ums=False, ums_offset=0):
beginning_decimal = dt.dt_to_decimal(beginning)
ending_decimal = dt.dt_to_decimal(ending)
if ums:
verifier_audit_func = functools.partial(
usage_audit._verifier_audit_for_day_ums, ums_offset=ums_offset
)
else:
verifier_audit_func = usage_audit._verifier_audit_for_day
(verify_summary,
verify_detail) = verifier_audit_func(beginning_decimal, ending_decimal,
models.InstanceExists)
detail, new_count, old_count = _launch_audit_for_period(beginning_decimal,
ending_decimal)
summary = {
'verifier': verify_summary,
'launch_summary': {
'new_launches': new_count,
'old_launches': old_count,
'failures': len(detail)
},
}
details = {
'exist_fails': verify_detail,
'launch_fails': detail,
}
return summary, details
def store_results(start, end, summary, details):
values = {
'json': make_json_report(summary, details),
'created': dt.dt_to_decimal(datetime.datetime.utcnow()),
'period_start': start,
'period_end': end,
'version': 7,
'name': 'nova usage audit'
}
report = models.JsonReport(**values)
report.save()
def make_json_report(summary, details):
report = {
'summary': summary,
'exist_fail_headers': ['Exists Row ID', 'Error Description', 'Cell',
'Compute'],
'exist_fails': details['exist_fails'],
'launch_fail_headers': ['Launch Row ID', 'Error Description',
'Reconciled?', 'Cell', 'Compute'],
'launch_fails': details['launch_fails']
}
return json.dumps(report)
def valid_datetime(d):
try:
t = datetime.datetime.strptime(d, "%Y-%m-%d %H:%M:%S")
return t
except Exception, e:
raise argparse.ArgumentTypeError(
"'%s' is not in YYYY-MM-DD HH:MM:SS format." % d)
if __name__ == '__main__':
parser = argparse.ArgumentParser('StackTach Nova Usage Audit Report')
parser.add_argument('--period_length',
choices=['hour', 'day'], default='day')
parser.add_argument('--utcdatetime',
help="Override the end time used to generate report.",
type=valid_datetime, default=None)
parser.add_argument('--store',
help="If set to true, report will be stored. "
"Otherwise, it will just be printed",
type=bool, default=False)
parser.add_argument('--reconcile',
help="Enabled reconciliation",
type=bool, default=False)
parser.add_argument('--reconciler_config',
help="Location of the reconciler config file",
type=str,
default='/etc/stacktach/reconciler-config.json')
parser.add_argument('--ums',
help="Use query to match UMS, "
"period length of 'day' required.",
action='store_true')
parser.add_argument('--ums-offset',
help="UMS' fencepost offset in seconds. Default: 4 days",
type=int,
default=DEFAULT_UMS_OFFSET)
args = parser.parse_args()
if args.ums and args.period_length != 'day':
print "UMS query can only be used with period_length of 'day'."
sys.exit(0)
stacklog.set_default_logger_name('nova_usage_audit')
parent_logger = stacklog.get_logger('nova_usage_audit', is_parent=True)
log_listener = stacklog.LogListener(parent_logger)
log_listener.start()
if args.reconcile:
with open(args.reconciler_config) as f:
reconciler_config = json.load(f)
reconciler = Reconciler(reconciler_config)
if args.utcdatetime is not None:
time = args.utcdatetime
else:
time = datetime.datetime.utcnow()
start, end = usage_audit.get_previous_period(time, args.period_length)
summary, details = audit_for_period(start, end, ums=args.ums,
ums_offset=args.ums_offset)
if not args.store:
print make_json_report(summary, details)
else:
store_results(start, end, summary, details)