From 59ee803c81dce78efff86b790a139d40a720e4e6 Mon Sep 17 00:00:00 2001
From: Manali Latkar <manalil@thoughtworks.com>
Date: Tue, 10 Sep 2013 14:51:04 +0530
Subject: [PATCH 1/4] glance_usage_audit.py gives the summary of all exists
 within audit period along with details of failed exists. Some refactoring in
 nova_usage_audit is also included.

---
 reports/__init__.py           |   0
 reports/glance_usage_audit.py | 220 +++++++++++++++++++++++++++++
 reports/nova_usage_audit.py   | 150 ++------------------
 reports/usage_audit.py        | 107 +++++++++++++++
 tests/unit/test_auditor.py    | 252 ++++++++++++++++++++++++++++++++++
 5 files changed, 594 insertions(+), 135 deletions(-)
 create mode 100644 reports/__init__.py
 create mode 100644 reports/glance_usage_audit.py
 create mode 100644 reports/usage_audit.py
 create mode 100644 tests/unit/test_auditor.py

diff --git a/reports/__init__.py b/reports/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/reports/glance_usage_audit.py b/reports/glance_usage_audit.py
new file mode 100644
index 0000000..a84235b
--- /dev/null
+++ b/reports/glance_usage_audit.py
@@ -0,0 +1,220 @@
+import argparse
+import datetime
+import json
+import os
+import sys
+
+sys.path.append(os.environ.get('STACKTACH_INSTALL_DIR', '/stacktach'))
+from django.db.models import F
+from reports import usage_audit
+from stacktach import models
+from stacktach import datetime_to_decimal as dt
+
+
+OLD_LAUNCHES_QUERY = """
+select * from stacktach_imageusage where
+    created_at is not null and
+    created_at < %s and
+    uuid not in
+        (select distinct(uuid)
+            from stacktach_imagedeletes where
+                deleted_at < %s);"""
+
+
+def audit_usages_to_exists(exists, usages):
+    # checks if all exists correspond to the given usages
+    fails = []
+    for (uuid, launches) in usages.items():
+        if uuid not in exists:
+            msg = "No exists for usage (%s)" % uuid
+            fails.append(['Usage', launches[0]['id'], msg])
+    return fails
+
+def _get_new_launches(beginning, ending):
+    filters = {
+        'created_at__gte': beginning,
+        'created_at__lte': ending,
+    }
+    return models.ImageUsage.objects.filter(**filters)
+
+def _get_exists(beginning, ending):
+    filters = {
+        'audit_period_beginning': beginning,
+        'audit_period_ending__gte': beginning,
+        'audit_period_ending__lte': ending,
+    }
+    return models.ImageExists.objects.filter(**filters)
+
+def valid_datetime(d):
+    try:
+        t = datetime.datetime.strptime(d, "%Y-%m-%d %H:%M:%S")
+        return t
+    except Exception, e:
+        raise argparse.ArgumentTypeError(
+            "'%s' is not in YYYY-MM-DD HH:MM:SS format." % d)
+
+
+def audit_for_period(beginning, ending):
+    beginning_decimal = dt.dt_to_decimal(beginning)
+    ending_decimal = dt.dt_to_decimal(ending)
+
+    (verify_summary,
+     verify_detail) = _verifier_audit_for_day(beginning_decimal,
+                                                          ending_decimal,
+                                                          models.ImageExists)
+    detail, new_count, old_count = _launch_audit_for_period(beginning_decimal,
+                                                            ending_decimal)
+
+    summary = {
+        'verifier': verify_summary,
+        'launch_summary': {
+            'new_launches': new_count,
+            'old_launches': old_count,
+            'failures': len(detail)
+        },
+    }
+
+    details = {
+        'exist_fails': verify_detail,
+        'launch_fails': detail,
+    }
+
+    return summary, details
+
+def _verifier_audit_for_day(beginning, ending, exists_model):
+    summary = {}
+    period = 60*60*24-0.000001
+    if args.period_length == 'hour':
+        period = 60*60-0.000001
+    filters = {
+        'raw__when__gte': beginning,
+        'raw__when__lte': ending,
+        'audit_period_ending': F('audit_period_beginning') + period
+
+    }
+    instant_exists = exists_model.objects.filter(**filters)
+    summary['exists'] = _audit_for_exists(instant_exists)
+
+    filters = {
+        'raw__when__gte': beginning,
+        'raw__when__lte': ending,
+        'status': exists_model.FAILED
+    }
+    failed = exists_model.objects.filter(**filters)
+    detail = []
+    for exist in failed:
+        detail.append(['Exist', exist.id, exist.fail_reason])
+    return summary, detail
+
+def _audit_for_exists(exists_query):
+    (verified, reconciled,
+     fail, pending, verifying) = usage_audit._status_queries(exists_query)
+
+    (success, unsent, redirect,
+     client_error, server_error) = usage_audit._send_status_queries(verified)
+
+    report = {
+        'count': exists_query.count(),
+        'verified': verified.count(),
+        'failed': fail.count(),
+        'pending': pending.count(),
+        'verifying': verifying.count(),
+        'send_status': {
+            'success': success.count(),
+            'unsent': unsent.count(),
+            'redirect': redirect.count(),
+            'client_error': client_error.count(),
+            'server_error': server_error.count(),
+        }
+    }
+    return report
+
+def _launch_audit_for_period(beginning, ending):
+    launches_dict = {}
+    new_launches = _get_new_launches(beginning, ending)
+    for launch in new_launches:
+        uuid = launch.uuid
+        l = {'id': launch.id, 'created_at': launch.created_at}
+        if uuid in launches_dict:
+            launches_dict[uuid].append(l)
+        else:
+            launches_dict[uuid] = [l, ]
+
+    # NOTE (apmelton)
+    # Django's safe substitution doesn't allow dict substitution...
+    # Thus, we send it 'beginning' three    times...
+    old_launches = models.ImageUsage.objects\
+                         .raw(OLD_LAUNCHES_QUERY,
+                              [beginning, beginning])
+
+    old_launches_dict = {}
+    for launch in old_launches:
+        uuid = launch.uuid
+        l = {'id': launch.id, 'created_at': launch.created_at}
+        old_launches_dict[uuid] = l
+
+    exists_dict = {}
+    exists = _get_exists(beginning, ending)
+    for exist in exists:
+        uuid = exist.uuid
+        e = {'id': exist.id,
+             'created_at': exist.created_at,
+             'deleted_at': exist.deleted_at}
+        if uuid in exists_dict:
+            exists_dict[uuid].append(e)
+        else:
+            exists_dict[uuid] = [e, ]
+
+    launch_to_exists_fails = audit_usages_to_exists(exists_dict,launches_dict)
+    return launch_to_exists_fails, new_launches.count(), len(old_launches_dict)
+
+
+def store_results(start, end, summary, details):
+    values = {
+        'json': make_json_report(summary, details),
+        'created': dt.dt_to_decimal(datetime.datetime.utcnow()),
+        'period_start': start,
+        'period_end': end,
+        'version': 4,
+        'name': 'glance usage audit'
+    }
+
+    report = models.JsonReport(**values)
+    report.save()
+
+
+def make_json_report(summary, details):
+    report = [{'summary': summary},
+              ['Object', 'ID', 'Error Description']]
+    report.extend(details['exist_fails'])
+    report.extend(details['launch_fails'])
+    return json.dumps(report)
+
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser('StackTach Nova Usage Audit Report')
+    parser.add_argument('--period_length',
+                        choices=['hour', 'day'], default='day')
+    parser.add_argument('--utcdatetime',
+                        help="Override the end time used to generate report.",
+                        type=valid_datetime, default=None)
+    parser.add_argument('--store',
+                        help="If set to true, report will be stored. "
+                             "Otherwise, it will just be printed",
+                        type=bool, default=False)
+    args = parser.parse_args()
+
+    if args.utcdatetime is not None:
+        time = args.utcdatetime
+    else:
+        time = datetime.datetime.utcnow()
+
+    start, end = usage_audit.get_previous_period(time, args.period_length)
+
+    summary, details = audit_for_period(start, end)
+
+    if not args.store:
+        print make_json_report(summary, details)
+    else:
+        store_results(start, end, summary, details)
diff --git a/reports/nova_usage_audit.py b/reports/nova_usage_audit.py
index fef2d01..a4862d8 100644
--- a/reports/nova_usage_audit.py
+++ b/reports/nova_usage_audit.py
@@ -26,28 +26,23 @@ import os
 
 sys.path.append(os.environ.get('STACKTACH_INSTALL_DIR', '/stacktach'))
 
-from django.db.models import F
+import usage_audit
 
 from stacktach import datetime_to_decimal as dt
 from stacktach import models
 from stacktach.reconciler import Reconciler
 
 OLD_LAUNCHES_QUERY = """
-select stacktach_instanceusage.id,
-       stacktach_instanceusage.instance,
-       stacktach_instanceusage.launched_at from stacktach_instanceusage
-    left outer join stacktach_instancedeletes on
-        stacktach_instanceusage.instance = stacktach_instancedeletes.instance
-    left outer join stacktach_instancereconcile on
-        stacktach_instanceusage.instance = stacktach_instancereconcile.instance
-        where (
-            stacktach_instancereconcile.deleted_at is null and (
-                stacktach_instancedeletes.deleted_at is null or
-                stacktach_instancedeletes.deleted_at > %s
-            )
-            or (stacktach_instancereconcile.deleted_at is not null and
-                stacktach_instancereconcile.deleted_at > %s)
-        ) and stacktach_instanceusage.launched_at < %s;"""
+select * from stacktach_instanceusage where
+    launched_at is not null and
+    launched_at < %s and
+    instance not in
+        (select distinct(instance)
+            from stacktach_instancedeletes where
+                deleted_at < %s union
+        select distinct(instance)
+            from stacktach_instancereconcile where
+                deleted_at < %s);"""
 
 OLD_RECONCILES_QUERY = """
 select stacktach_instancereconcile.id,
@@ -123,98 +118,6 @@ def _audit_launches_to_exists(launches, exists, beginning):
     return fails
 
 
-def _status_queries(exists_query):
-    verified = exists_query.filter(status=models.InstanceExists.VERIFIED)
-    reconciled = exists_query.filter(status=models.InstanceExists.RECONCILED)
-    fail = exists_query.filter(status=models.InstanceExists.FAILED)
-    pending = exists_query.filter(status=models.InstanceExists.PENDING)
-    verifying = exists_query.filter(status=models.InstanceExists.VERIFYING)
-
-    return verified, reconciled, fail, pending, verifying
-
-
-def _send_status_queries(exists_query):
-    unsent = exists_query.filter(send_status=0)
-    success = exists_query.filter(send_status__gte=200,
-                                  send_status__lt=300)
-    redirect = exists_query.filter(send_status__gte=300,
-                                   send_status__lt=400)
-    client_error = exists_query.filter(send_status__gte=400,
-                                       send_status__lt=500)
-    server_error = exists_query.filter(send_status__gte=500,
-                                       send_status__lt=600)
-    return success, unsent, redirect, client_error, server_error
-
-
-def _audit_for_exists(exists_query):
-    (verified, reconciled,
-     fail, pending, verifying) = _status_queries(exists_query)
-
-    (success, unsent, redirect,
-     client_error, server_error) = _send_status_queries(verified)
-
-    (success_rec, unsent_rec, redirect_rec,
-     client_error_rec, server_error_rec) = _send_status_queries(reconciled)
-
-    report = {
-        'count': exists_query.count(),
-        'verified': verified.count(),
-        'reconciled': reconciled.count(),
-        'failed': fail.count(),
-        'pending': pending.count(),
-        'verifying': verifying.count(),
-        'send_status': {
-            'success': success.count(),
-            'unsent': unsent.count(),
-            'redirect': redirect.count(),
-            'client_error': client_error.count(),
-            'server_error': server_error.count(),
-        },
-        'send_status_rec': {
-            'success': success_rec.count(),
-            'unsent': unsent_rec.count(),
-            'redirect': redirect_rec.count(),
-            'client_error': client_error_rec.count(),
-            'server_error': server_error_rec.count(),
-        }
-    }
-
-    return report
-
-
-def _verifier_audit_for_day(beginning, ending):
-    summary = {}
-
-    filters = {
-        'raw__when__gte': beginning,
-        'raw__when__lte': ending,
-        'audit_period_ending': F('audit_period_beginning') + (60*60*24)
-    }
-    periodic_exists = models.InstanceExists.objects.filter(**filters)
-
-    summary['periodic'] = _audit_for_exists(periodic_exists)
-
-    filters = {
-        'raw__when__gte': beginning,
-        'raw__when__lte': ending,
-        'audit_period_ending__lt': F('audit_period_beginning') + (60*60*24)
-    }
-    instant_exists = models.InstanceExists.objects.filter(**filters)
-
-    summary['instantaneous'] = _audit_for_exists(instant_exists)
-
-    filters = {
-        'raw__when__gte': beginning,
-        'raw__when__lte': ending,
-        'status': models.InstanceExists.FAILED
-    }
-    failed = models.InstanceExists.objects.filter(**filters)
-    detail = []
-    for exist in failed:
-        detail.append(['Exist', exist.id, exist.fail_reason])
-    return summary, detail
-
-
 def _launch_audit_for_period(beginning, ending):
     launches_dict = {}
     new_launches = _get_new_launches(beginning, ending)
@@ -278,7 +181,6 @@ def _launch_audit_for_period(beginning, ending):
     launch_to_exists_fails = _audit_launches_to_exists(launches_dict,
                                                        exists_dict,
                                                        beginning)
-
     return launch_to_exists_fails, new_launches.count(), len(old_launches_dict)
 
 
@@ -287,8 +189,9 @@ def audit_for_period(beginning, ending):
     ending_decimal = dt.dt_to_decimal(ending)
 
     (verify_summary,
-     verify_detail) = _verifier_audit_for_day(beginning_decimal,
-                                              ending_decimal)
+     verify_detail) = usage_audit._verifier_audit_for_day(beginning_decimal,
+                                                          ending_decimal,
+                                                          models.InstanceExists)
     detail, new_count, old_count = _launch_audit_for_period(beginning_decimal,
                                                             ending_decimal)
 
@@ -309,29 +212,6 @@ def audit_for_period(beginning, ending):
     return summary, details
 
 
-def get_previous_period(time, period_length):
-    if period_length == 'day':
-        last_period = time - datetime.timedelta(days=1)
-        start = datetime.datetime(year=last_period.year,
-                                  month=last_period.month,
-                                  day=last_period.day)
-        end = datetime.datetime(year=time.year,
-                                month=time.month,
-                                day=time.day)
-        return start, end
-    elif period_length == 'hour':
-        last_period = time - datetime.timedelta(hours=1)
-        start = datetime.datetime(year=last_period.year,
-                                  month=last_period.month,
-                                  day=last_period.day,
-                                  hour=last_period.hour)
-        end = datetime.datetime(year=time.year,
-                                month=time.month,
-                                day=time.day,
-                                hour=time.hour)
-        return start, end
-
-
 def store_results(start, end, summary, details):
     values = {
         'json': make_json_report(summary, details),
@@ -393,7 +273,7 @@ if __name__ == '__main__':
     else:
         time = datetime.datetime.utcnow()
 
-    start, end = get_previous_period(time, args.period_length)
+    start, end = usage_audit.get_previous_period(time, args.period_length)
 
     summary, details = audit_for_period(start, end)
 
diff --git a/reports/usage_audit.py b/reports/usage_audit.py
new file mode 100644
index 0000000..f3168e1
--- /dev/null
+++ b/reports/usage_audit.py
@@ -0,0 +1,107 @@
+import datetime
+from django.db.models import F
+from stacktach import models
+
+
+def _status_queries(exists_query):
+    verified = exists_query.filter(status=models.InstanceExists.VERIFIED)
+    reconciled = exists_query.filter(status=models.InstanceExists.RECONCILED)
+    fail = exists_query.filter(status=models.InstanceExists.FAILED)
+    pending = exists_query.filter(status=models.InstanceExists.PENDING)
+    verifying = exists_query.filter(status=models.InstanceExists.VERIFYING)
+
+    return verified, reconciled, fail, pending, verifying
+
+
+def _send_status_queries(exists_query):
+    unsent = exists_query.filter(send_status=0)
+    success = exists_query.filter(send_status__gte=200,
+                                  send_status__lt=300)
+    redirect = exists_query.filter(send_status__gte=300,
+                                   send_status__lt=400)
+    client_error = exists_query.filter(send_status__gte=400,
+                                       send_status__lt=500)
+    server_error = exists_query.filter(send_status__gte=500,
+                                       send_status__lt=600)
+    return success, unsent, redirect, client_error, server_error
+
+
+def _audit_for_exists(exists_query):
+    (verified, reconciled,
+     fail, pending, verifying) = _status_queries(exists_query)
+
+    (success, unsent, redirect,
+     client_error, server_error) = _send_status_queries(verified)
+
+    report = {
+        'count': exists_query.count(),
+        'verified': verified.count(),
+        'reconciled': reconciled.count(),
+        'failed': fail.count(),
+        'pending': pending.count(),
+        'verifying': verifying.count(),
+        'send_status': {
+            'success': success.count(),
+            'unsent': unsent.count(),
+            'redirect': redirect.count(),
+            'client_error': client_error.count(),
+            'server_error': server_error.count(),
+        }
+    }
+
+    return report
+
+
+def _verifier_audit_for_day(beginning, ending, exists_model):
+    summary = {}
+
+    filters = {
+        'raw__when__gte': beginning,
+        'raw__when__lte': ending,
+        'audit_period_ending': F('audit_period_beginning') + (60*60*24)
+    }
+    periodic_exists = exists_model.objects.filter(**filters)
+
+    summary['periodic'] = _audit_for_exists(periodic_exists)
+
+    filters = {
+        'raw__when__gte': beginning,
+        'raw__when__lte': ending,
+        'audit_period_ending__lt': F('audit_period_beginning') + (60*60*24)
+    }
+    instant_exists = exists_model.objects.filter(**filters)
+
+    summary['instantaneous'] = _audit_for_exists(instant_exists)
+
+    filters = {
+        'raw__when__gte': beginning,
+        'raw__when__lte': ending,
+        'status': exists_model.FAILED
+    }
+    failed = exists_model.objects.filter(**filters)
+    detail = []
+    for exist in failed:
+        detail.append(['Exist', exist.id, exist.fail_reason])
+    return summary, detail
+
+def get_previous_period(time, period_length):
+    if period_length == 'day':
+        last_period = time - datetime.timedelta(days=1)
+        start = datetime.datetime(year=last_period.year,
+                                  month=last_period.month,
+                                  day=last_period.day)
+        end = datetime.datetime(year=time.year,
+                                month=time.month,
+                                day=time.day)
+        return start, end
+    elif period_length == 'hour':
+        last_period = time - datetime.timedelta(hours=1)
+        start = datetime.datetime(year=last_period.year,
+                                  month=last_period.month,
+                                  day=last_period.day,
+                                  hour=last_period.hour)
+        end = datetime.datetime(year=time.year,
+                                month=time.month,
+                                day=time.day,
+                                hour=time.hour)
+        return start, end
\ No newline at end of file
diff --git a/tests/unit/test_auditor.py b/tests/unit/test_auditor.py
new file mode 100644
index 0000000..ebbe101
--- /dev/null
+++ b/tests/unit/test_auditor.py
@@ -0,0 +1,252 @@
+# Copyright (c) 2013 - Rackspace Inc.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+import argparse
+import datetime
+import json
+import sys
+import os
+from reports import usage_audit
+
+sys.path.append(os.environ.get('STACKTACH_INSTALL_DIR', '/stacktach'))
+
+from stacktach import datetime_to_decimal as dt
+from stacktach import models
+from stacktach.reconciler import Reconciler
+
+OLD_LAUNCHES_QUERY = """
+select * from stacktach_instanceusage where
+    launched_at is not null and
+    launched_at < %s and
+    instance not in
+        (select distinct(instance)
+            from stacktach_instancedeletes where
+                deleted_at < %s union
+        select distinct(instance)
+            from stacktach_instancereconcile where
+                deleted_at < %s);"""
+
+reconciler = None
+
+
+def _get_new_launches(beginning, ending):
+    filters = {
+        'launched_at__gte': beginning,
+        'launched_at__lte': ending,
+    }
+    return models.InstanceUsage.objects.filter(**filters)
+
+
+def _get_deletes(beginning, ending):
+    filters = {
+        'deleted_at__gte': beginning,
+        'deleted_at__lte': ending,
+    }
+    return models.InstanceDeletes.objects.filter(**filters)
+
+
+def _get_exists(beginning, ending):
+    filters = {
+        'audit_period_beginning': beginning,
+        'audit_period_ending__gte': beginning,
+        'audit_period_ending__lte': ending,
+    }
+    return models.InstanceExists.objects.filter(**filters)
+
+
+def _audit_launches_to_exists(launches, exists, beginning):
+    fails = []
+    for (instance, launches) in launches.items():
+        if instance in exists:
+            for expected in launches:
+                found = False
+                for actual in exists[instance]:
+                    if int(expected['launched_at']) == \
+                            int(actual['launched_at']):
+                    # HACK (apmelton): Truncate the decimal because we may not
+                    #    have the milliseconds.
+                        found = True
+
+                if not found:
+                    rec = False
+                    if reconciler:
+                        args = (expected['id'], beginning)
+                        rec = reconciler.missing_exists_for_instance(*args)
+                    msg = "Couldn't find exists for launch (%s, %s)"
+                    msg = msg % (instance, expected['launched_at'])
+                    fails.append(['Launch', expected['id'], msg, 'Y' if rec else 'N'])
+        else:
+            rec = False
+            if reconciler:
+                args = (launches[0]['id'], beginning)
+                rec = reconciler.missing_exists_for_instance(*args)
+            msg = "No exists for instance (%s)" % instance
+            fails.append(['Launch', '-', msg, 'Y' if rec else 'N'])
+    return fails
+
+
+def _launch_audit_for_period(beginning, ending):
+    launches_dict = {}
+    new_launches = _get_new_launches(beginning, ending)
+    for launch in new_launches:
+        instance = launch.instance
+        l = {'id': launch.id, 'launched_at': launch.launched_at}
+        if instance in launches_dict:
+            launches_dict[instance].append(l)
+        else:
+            launches_dict[instance] = [l, ]
+
+    # NOTE (apmelton)
+    # Django's safe substitution doesn't allow dict substitution...
+    # Thus, we send it 'beginning' three times...
+    old_launches = models.InstanceUsage.objects\
+                         .raw(OLD_LAUNCHES_QUERY,
+                              [beginning, beginning, beginning])
+
+    old_launches_dict = {}
+    for launch in old_launches:
+        instance = launch.instance
+        l = {'id': launch.id, 'launched_at': launch.launched_at}
+        if instance not in old_launches_dict or \
+                (old_launches_dict[instance]['launched_at'] <
+                 launch.launched_at):
+            old_launches_dict[instance] = l
+
+    for instance, launch in old_launches_dict.items():
+        if instance in launches_dict:
+            launches_dict[instance].append(launch)
+        else:
+            launches_dict[instance] = [launch, ]
+
+    exists_dict = {}
+    exists = _get_exists(beginning, ending)
+    for exist in exists:
+        instance = exist.instance
+        e = {'id': exist.id,
+             'launched_at': exist.launched_at,
+             'deleted_at': exist.deleted_at}
+        if instance in exists_dict:
+            exists_dict[instance].append(e)
+        else:
+            exists_dict[instance] = [e, ]
+
+    launch_to_exists_fails = _audit_launches_to_exists(launches_dict,
+                                                       exists_dict,
+                                                       beginning)
+    return launch_to_exists_fails, new_launches.count(), len(old_launches_dict)
+
+
+def audit_for_period(beginning, ending):
+    beginning_decimal = dt.dt_to_decimal(beginning)
+    ending_decimal = dt.dt_to_decimal(ending)
+
+    (verify_summary,
+     verify_detail) = usage_audit._verifier_audit_for_day(beginning_decimal,
+                                                          ending_decimal,
+                                                          models.InstanceExists)
+    detail, new_count, old_count = _launch_audit_for_period(beginning_decimal,
+                                                            ending_decimal)
+
+    summary = {
+        'verifier': verify_summary,
+        'launch_summary': {
+            'new_launches': new_count,
+            'old_launches': old_count,
+            'failures': len(detail)
+        },
+    }
+
+    details = {
+        'exist_fails': verify_detail,
+        'launch_fails': detail,
+    }
+
+    return summary, details
+
+
+def store_results(start, end, summary, details):
+    values = {
+        'json': make_json_report(summary, details),
+        'created': dt.dt_to_decimal(datetime.datetime.utcnow()),
+        'period_start': start,
+        'period_end': end,
+        'version': 4,
+        'name': 'nova usage audit'
+    }
+
+    report = models.JsonReport(**values)
+    report.save()
+
+
+def make_json_report(summary, details):
+    report = [{'summary': summary},
+              ['Object', 'ID', 'Error Description', 'Reconciled?']]
+    report.extend(details['exist_fails'])
+    report.extend(details['launch_fails'])
+    return json.dumps(report)
+
+
+def valid_datetime(d):
+    try:
+        t = datetime.datetime.strptime(d, "%Y-%m-%d %H:%M:%S")
+        return t
+    except Exception, e:
+        raise argparse.ArgumentTypeError(
+            "'%s' is not in YYYY-MM-DD HH:MM:SS format." % d)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser('StackTach Nova Usage Audit Report')
+    parser.add_argument('--period_length',
+                        choices=['hour', 'day'], default='day')
+    parser.add_argument('--utcdatetime',
+                        help="Override the end time used to generate report.",
+                        type=valid_datetime, default=None)
+    parser.add_argument('--store',
+                        help="If set to true, report will be stored. "
+                             "Otherwise, it will just be printed",
+                        type=bool, default=False)
+    parser.add_argument('--reconcile',
+                        help="Enabled reconciliation",
+                        type=bool, default=False)
+    parser.add_argument('--reconciler_config',
+                        help="Location of the reconciler config file",
+                        type=str,
+                        default='/etc/stacktach/reconciler-config.json')
+    args = parser.parse_args()
+
+    if args.reconcile:
+        with open(args.reconciler_config) as f:
+            reconciler_config = json.load(f)
+            reconciler = Reconciler(reconciler_config)
+
+    if args.utcdatetime is not None:
+        time = args.utcdatetime
+    else:
+        time = datetime.datetime.utcnow()
+
+    start, end = usage_audit.get_previous_period(time, args.period_length)
+
+    summary, details = audit_for_period(start, end)
+
+    if not args.store:
+        print make_json_report(summary, details)
+    else:
+        store_results(start, end, summary, details)

From b76e245c12629f644b0d25df6a19bdde3a2784ae Mon Sep 17 00:00:00 2001
From: Manali Latkar <manalil@thoughtworks.com>
Date: Thu, 19 Sep 2013 16:43:53 +0530
Subject: [PATCH 2/4] incorporated the review changes and optimized the
 old_images_query

---
 reports/glance_usage_audit.py | 83 ++++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 41 deletions(-)

diff --git a/reports/glance_usage_audit.py b/reports/glance_usage_audit.py
index a84235b..7588d7f 100644
--- a/reports/glance_usage_audit.py
+++ b/reports/glance_usage_audit.py
@@ -10,33 +10,32 @@ from reports import usage_audit
 from stacktach import models
 from stacktach import datetime_to_decimal as dt
 
-
-OLD_LAUNCHES_QUERY = """
-select * from stacktach_imageusage where
-    created_at is not null and
-    created_at < %s and
-    uuid not in
-        (select distinct(uuid)
-            from stacktach_imagedeletes where
-                deleted_at < %s);"""
+OLD_IMAGES_QUERY = """
+select * from stacktach_imageusage left join stacktach_imagedeletes
+on (stacktach_imageusage.uuid = stacktach_imagedeletes.uuid and
+ deleted_at < %s)
+ where stacktach_imagedeletes.id IS NULL
+ and created_at is not null and created_at < %s;"""
 
 
 def audit_usages_to_exists(exists, usages):
     # checks if all exists correspond to the given usages
     fails = []
-    for (uuid, launches) in usages.items():
+    for (uuid, images) in usages.items():
         if uuid not in exists:
             msg = "No exists for usage (%s)" % uuid
-            fails.append(['Usage', launches[0]['id'], msg])
+            fails.append(['Usage', images[0]['id'], msg])
     return fails
 
-def _get_new_launches(beginning, ending):
+
+def _get_new_images(beginning, ending):
     filters = {
         'created_at__gte': beginning,
         'created_at__lte': ending,
     }
     return models.ImageUsage.objects.filter(**filters)
 
+
 def _get_exists(beginning, ending):
     filters = {
         'audit_period_beginning': beginning,
@@ -45,6 +44,7 @@ def _get_exists(beginning, ending):
     }
     return models.ImageExists.objects.filter(**filters)
 
+
 def valid_datetime(d):
     try:
         t = datetime.datetime.strptime(d, "%Y-%m-%d %H:%M:%S")
@@ -62,25 +62,26 @@ def audit_for_period(beginning, ending):
      verify_detail) = _verifier_audit_for_day(beginning_decimal,
                                                           ending_decimal,
                                                           models.ImageExists)
-    detail, new_count, old_count = _launch_audit_for_period(beginning_decimal,
+    detail, new_count, old_count = _image_audit_for_period(beginning_decimal,
                                                             ending_decimal)
 
     summary = {
         'verifier': verify_summary,
-        'launch_summary': {
-            'new_launches': new_count,
-            'old_launches': old_count,
+        'image_summary': {
+            'new_images': new_count,
+            'old_images': old_count,
             'failures': len(detail)
         },
     }
 
     details = {
         'exist_fails': verify_detail,
-        'launch_fails': detail,
+        'image_fails': detail,
     }
 
     return summary, details
 
+
 def _verifier_audit_for_day(beginning, ending, exists_model):
     summary = {}
     period = 60*60*24-0.000001
@@ -92,8 +93,8 @@ def _verifier_audit_for_day(beginning, ending, exists_model):
         'audit_period_ending': F('audit_period_beginning') + period
 
     }
-    instant_exists = exists_model.objects.filter(**filters)
-    summary['exists'] = _audit_for_exists(instant_exists)
+    exists = exists_model.objects.filter(**filters)
+    summary['exists'] = _audit_for_exists(exists)
 
     filters = {
         'raw__when__gte': beginning,
@@ -106,6 +107,7 @@ def _verifier_audit_for_day(beginning, ending, exists_model):
         detail.append(['Exist', exist.id, exist.fail_reason])
     return summary, detail
 
+
 def _audit_for_exists(exists_query):
     (verified, reconciled,
      fail, pending, verifying) = usage_audit._status_queries(exists_query)
@@ -129,29 +131,28 @@ def _audit_for_exists(exists_query):
     }
     return report
 
-def _launch_audit_for_period(beginning, ending):
-    launches_dict = {}
-    new_launches = _get_new_launches(beginning, ending)
-    for launch in new_launches:
-        uuid = launch.uuid
-        l = {'id': launch.id, 'created_at': launch.created_at}
-        if uuid in launches_dict:
-            launches_dict[uuid].append(l)
-        else:
-            launches_dict[uuid] = [l, ]
 
-    # NOTE (apmelton)
+def _image_audit_for_period(beginning, ending):
+    images_dict = {}
+    new_images = _get_new_images(beginning, ending)
+    for image in new_images:
+        uuid = image.uuid
+        l = {'id': image.id, 'created_at': image.created_at}
+        if uuid in images_dict:
+            images_dict[uuid].append(l)
+        else:
+            images_dict[uuid] = [l, ]
     # Django's safe substitution doesn't allow dict substitution...
-    # Thus, we send it 'beginning' three    times...
-    old_launches = models.ImageUsage.objects\
-                         .raw(OLD_LAUNCHES_QUERY,
+    # Thus, we send it 'beginning' two times...
+    old_images = models.ImageUsage.objects\
+                         .raw(OLD_IMAGES_QUERY,
                               [beginning, beginning])
 
-    old_launches_dict = {}
-    for launch in old_launches:
-        uuid = launch.uuid
-        l = {'id': launch.id, 'created_at': launch.created_at}
-        old_launches_dict[uuid] = l
+    old_images_dict = {}
+    for image in old_images:
+        uuid = image.uuid
+        l = {'id': image.id, 'created_at': image.created_at}
+        old_images_dict[uuid] = l
 
     exists_dict = {}
     exists = _get_exists(beginning, ending)
@@ -165,8 +166,8 @@ def _launch_audit_for_period(beginning, ending):
         else:
             exists_dict[uuid] = [e, ]
 
-    launch_to_exists_fails = audit_usages_to_exists(exists_dict,launches_dict)
-    return launch_to_exists_fails, new_launches.count(), len(old_launches_dict)
+    image_to_exists_fails = audit_usages_to_exists(exists_dict,images_dict)
+    return image_to_exists_fails, new_images.count(), len(old_images_dict)
 
 
 def store_results(start, end, summary, details):
@@ -187,7 +188,7 @@ def make_json_report(summary, details):
     report = [{'summary': summary},
               ['Object', 'ID', 'Error Description']]
     report.extend(details['exist_fails'])
-    report.extend(details['launch_fails'])
+    report.extend(details['image_fails'])
     return json.dumps(report)
 
 

From be06dbff8c523b86d1a225539db89a3d20533e0d Mon Sep 17 00:00:00 2001
From: Manali Latkar <manalil@thoughtworks.com>
Date: Tue, 29 Oct 2013 16:57:16 +0530
Subject: [PATCH 3/4] correcting the old_launches query to the optimized one

---
 reports/nova_usage_audit.py | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/reports/nova_usage_audit.py b/reports/nova_usage_audit.py
index a4862d8..b7b94a1 100644
--- a/reports/nova_usage_audit.py
+++ b/reports/nova_usage_audit.py
@@ -33,16 +33,21 @@ from stacktach import models
 from stacktach.reconciler import Reconciler
 
 OLD_LAUNCHES_QUERY = """
-select * from stacktach_instanceusage where
-    launched_at is not null and
-    launched_at < %s and
-    instance not in
-        (select distinct(instance)
-            from stacktach_instancedeletes where
-                deleted_at < %s union
-        select distinct(instance)
-            from stacktach_instancereconcile where
-                deleted_at < %s);"""
+select stacktach_instanceusage.id,
+       stacktach_instanceusage.instance,
+       stacktach_instanceusage.launched_at from stacktach_instanceusage
+    left outer join stacktach_instancedeletes on
+        stacktach_instanceusage.instance = stacktach_instancedeletes.instance
+    left outer join stacktach_instancereconcile on
+        stacktach_instanceusage.instance = stacktach_instancereconcile.instance
+        where (
+            stacktach_instancereconcile.deleted_at is null and (
+                stacktach_instancedeletes.deleted_at is null or
+                stacktach_instancedeletes.deleted_at > %s
+            )
+            or (stacktach_instancereconcile.deleted_at is not null and
+                stacktach_instancereconcile.deleted_at > %s)
+        ) and stacktach_instanceusage.launched_at < %s;"""
 
 OLD_RECONCILES_QUERY = """
 select stacktach_instancereconcile.id,

From 1a1820a71c241edf06234b197ed2203c10b680a3 Mon Sep 17 00:00:00 2001
From: Manali Latkar <manalil@thoughtworks.com>
Date: Mon, 4 Nov 2013 11:39:26 +0530
Subject: [PATCH 4/4] removing the unwanted old file

---
 tests/unit/test_auditor.py | 252 -------------------------------------
 1 file changed, 252 deletions(-)
 delete mode 100644 tests/unit/test_auditor.py

diff --git a/tests/unit/test_auditor.py b/tests/unit/test_auditor.py
deleted file mode 100644
index ebbe101..0000000
--- a/tests/unit/test_auditor.py
+++ /dev/null
@@ -1,252 +0,0 @@
-# Copyright (c) 2013 - Rackspace Inc.
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-import argparse
-import datetime
-import json
-import sys
-import os
-from reports import usage_audit
-
-sys.path.append(os.environ.get('STACKTACH_INSTALL_DIR', '/stacktach'))
-
-from stacktach import datetime_to_decimal as dt
-from stacktach import models
-from stacktach.reconciler import Reconciler
-
-OLD_LAUNCHES_QUERY = """
-select * from stacktach_instanceusage where
-    launched_at is not null and
-    launched_at < %s and
-    instance not in
-        (select distinct(instance)
-            from stacktach_instancedeletes where
-                deleted_at < %s union
-        select distinct(instance)
-            from stacktach_instancereconcile where
-                deleted_at < %s);"""
-
-reconciler = None
-
-
-def _get_new_launches(beginning, ending):
-    filters = {
-        'launched_at__gte': beginning,
-        'launched_at__lte': ending,
-    }
-    return models.InstanceUsage.objects.filter(**filters)
-
-
-def _get_deletes(beginning, ending):
-    filters = {
-        'deleted_at__gte': beginning,
-        'deleted_at__lte': ending,
-    }
-    return models.InstanceDeletes.objects.filter(**filters)
-
-
-def _get_exists(beginning, ending):
-    filters = {
-        'audit_period_beginning': beginning,
-        'audit_period_ending__gte': beginning,
-        'audit_period_ending__lte': ending,
-    }
-    return models.InstanceExists.objects.filter(**filters)
-
-
-def _audit_launches_to_exists(launches, exists, beginning):
-    fails = []
-    for (instance, launches) in launches.items():
-        if instance in exists:
-            for expected in launches:
-                found = False
-                for actual in exists[instance]:
-                    if int(expected['launched_at']) == \
-                            int(actual['launched_at']):
-                    # HACK (apmelton): Truncate the decimal because we may not
-                    #    have the milliseconds.
-                        found = True
-
-                if not found:
-                    rec = False
-                    if reconciler:
-                        args = (expected['id'], beginning)
-                        rec = reconciler.missing_exists_for_instance(*args)
-                    msg = "Couldn't find exists for launch (%s, %s)"
-                    msg = msg % (instance, expected['launched_at'])
-                    fails.append(['Launch', expected['id'], msg, 'Y' if rec else 'N'])
-        else:
-            rec = False
-            if reconciler:
-                args = (launches[0]['id'], beginning)
-                rec = reconciler.missing_exists_for_instance(*args)
-            msg = "No exists for instance (%s)" % instance
-            fails.append(['Launch', '-', msg, 'Y' if rec else 'N'])
-    return fails
-
-
-def _launch_audit_for_period(beginning, ending):
-    launches_dict = {}
-    new_launches = _get_new_launches(beginning, ending)
-    for launch in new_launches:
-        instance = launch.instance
-        l = {'id': launch.id, 'launched_at': launch.launched_at}
-        if instance in launches_dict:
-            launches_dict[instance].append(l)
-        else:
-            launches_dict[instance] = [l, ]
-
-    # NOTE (apmelton)
-    # Django's safe substitution doesn't allow dict substitution...
-    # Thus, we send it 'beginning' three times...
-    old_launches = models.InstanceUsage.objects\
-                         .raw(OLD_LAUNCHES_QUERY,
-                              [beginning, beginning, beginning])
-
-    old_launches_dict = {}
-    for launch in old_launches:
-        instance = launch.instance
-        l = {'id': launch.id, 'launched_at': launch.launched_at}
-        if instance not in old_launches_dict or \
-                (old_launches_dict[instance]['launched_at'] <
-                 launch.launched_at):
-            old_launches_dict[instance] = l
-
-    for instance, launch in old_launches_dict.items():
-        if instance in launches_dict:
-            launches_dict[instance].append(launch)
-        else:
-            launches_dict[instance] = [launch, ]
-
-    exists_dict = {}
-    exists = _get_exists(beginning, ending)
-    for exist in exists:
-        instance = exist.instance
-        e = {'id': exist.id,
-             'launched_at': exist.launched_at,
-             'deleted_at': exist.deleted_at}
-        if instance in exists_dict:
-            exists_dict[instance].append(e)
-        else:
-            exists_dict[instance] = [e, ]
-
-    launch_to_exists_fails = _audit_launches_to_exists(launches_dict,
-                                                       exists_dict,
-                                                       beginning)
-    return launch_to_exists_fails, new_launches.count(), len(old_launches_dict)
-
-
-def audit_for_period(beginning, ending):
-    beginning_decimal = dt.dt_to_decimal(beginning)
-    ending_decimal = dt.dt_to_decimal(ending)
-
-    (verify_summary,
-     verify_detail) = usage_audit._verifier_audit_for_day(beginning_decimal,
-                                                          ending_decimal,
-                                                          models.InstanceExists)
-    detail, new_count, old_count = _launch_audit_for_period(beginning_decimal,
-                                                            ending_decimal)
-
-    summary = {
-        'verifier': verify_summary,
-        'launch_summary': {
-            'new_launches': new_count,
-            'old_launches': old_count,
-            'failures': len(detail)
-        },
-    }
-
-    details = {
-        'exist_fails': verify_detail,
-        'launch_fails': detail,
-    }
-
-    return summary, details
-
-
-def store_results(start, end, summary, details):
-    values = {
-        'json': make_json_report(summary, details),
-        'created': dt.dt_to_decimal(datetime.datetime.utcnow()),
-        'period_start': start,
-        'period_end': end,
-        'version': 4,
-        'name': 'nova usage audit'
-    }
-
-    report = models.JsonReport(**values)
-    report.save()
-
-
-def make_json_report(summary, details):
-    report = [{'summary': summary},
-              ['Object', 'ID', 'Error Description', 'Reconciled?']]
-    report.extend(details['exist_fails'])
-    report.extend(details['launch_fails'])
-    return json.dumps(report)
-
-
-def valid_datetime(d):
-    try:
-        t = datetime.datetime.strptime(d, "%Y-%m-%d %H:%M:%S")
-        return t
-    except Exception, e:
-        raise argparse.ArgumentTypeError(
-            "'%s' is not in YYYY-MM-DD HH:MM:SS format." % d)
-
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser('StackTach Nova Usage Audit Report')
-    parser.add_argument('--period_length',
-                        choices=['hour', 'day'], default='day')
-    parser.add_argument('--utcdatetime',
-                        help="Override the end time used to generate report.",
-                        type=valid_datetime, default=None)
-    parser.add_argument('--store',
-                        help="If set to true, report will be stored. "
-                             "Otherwise, it will just be printed",
-                        type=bool, default=False)
-    parser.add_argument('--reconcile',
-                        help="Enabled reconciliation",
-                        type=bool, default=False)
-    parser.add_argument('--reconciler_config',
-                        help="Location of the reconciler config file",
-                        type=str,
-                        default='/etc/stacktach/reconciler-config.json')
-    args = parser.parse_args()
-
-    if args.reconcile:
-        with open(args.reconciler_config) as f:
-            reconciler_config = json.load(f)
-            reconciler = Reconciler(reconciler_config)
-
-    if args.utcdatetime is not None:
-        time = args.utcdatetime
-    else:
-        time = datetime.datetime.utcnow()
-
-    start, end = usage_audit.get_previous_period(time, args.period_length)
-
-    summary, details = audit_for_period(start, end)
-
-    if not args.store:
-        print make_json_report(summary, details)
-    else:
-        store_results(start, end, summary, details)