From c74bd48a48a8a87fe312ebba7da9a33387dcc262 Mon Sep 17 00:00:00 2001
From: Thomas Maddox <thomas.maddox@RACKSPACE.com>
Date: Wed, 12 Jun 2013 16:39:04 -0500
Subject: [PATCH 1/4] first checkin for os_type; grabbing from notification
 json

---
 reports/pretty.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/reports/pretty.py b/reports/pretty.py
index 7867592..81c013b 100644
--- a/reports/pretty.py
+++ b/reports/pretty.py
@@ -77,6 +77,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
             failure_type = None
 
             operation = "aux"
+            os_type = "other"
             image_type_num = 0
 
             for raw in raws:
@@ -99,6 +100,15 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
                         operation = cmd
                         break
 
+                # Brace yourself. We are now painfully digging into the
+                # notification to get the os_type attribute
+                if os_type == "other" and raw.json:
+                    notification = json.loads(raw.json)
+                    if notification[1]:
+                        os_type = notification[1].get('payload', {})\
+                            .get('image_meta', {})\
+                            .get('os_type', "other")
+
                 if raw.image_type:
                     image_type_num |= raw.image_type
 
@@ -117,7 +127,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
             if diff > too_long and failure_type == None:
                 failure_type = too_long_col
 
-            key = (operation, image)
+            key = (operation, image, os_type)
 
             # Track durations for all attempts, good and bad ...
             _durations = durations.get(key, [])
@@ -150,7 +160,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
     report.append(details)
 
     failure_types = ["4xx", "5xx", too_long_col, "state"]
-    cols = ["Operation", "Image", "Min", "Max", "Med", "%d%%" % percentile,
+    cols = ["Operation", "Image", "OS", "Min", "Max", "Med", "%d%%" % percentile,
             "Requests"]
     for failure_type in failure_types:
         cols.append("%s" % failure_type)
@@ -161,7 +171,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
     failure_totals = {}
     for key, count in attempts.iteritems():
         total += count
-        operation, image = key
+        operation, image, os_type = key
 
         breakdown = failures.get(key, {})
         this_failure_pair = []
@@ -199,7 +209,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
         _fmedian = dt.sec_to_str(_median)
         _fpercentile = dt.sec_to_str(_percentile)
 
-        row = [operation, image, _fmin, _fmax, _fmedian, _fpercentile, count]
+        row = [operation, image, os_type, _fmin, _fmax, _fmedian, _fpercentile, count]
         for failure_count, failure_percentage in this_failure_pair:
             row.append(failure_count)
             row.append(failure_percentage)
@@ -316,7 +326,7 @@ if __name__ == '__main__':
 
     for row in raw_report[2:]:
         frow = row[:]
-        for col in [8, 10, 12, 14]:
+        for col in [9, 11, 13, 15]:
             frow[col] = "%.1f%%" % (row[col] * 100.0)
         p.add_row(frow)
     print p

From 6676e85c0fe42ba52156e93b9ec907ceff30776a Mon Sep 17 00:00:00 2001
From: Thomas Maddox <thomas.maddox@RACKSPACE.com>
Date: Thu, 13 Jun 2013 12:47:34 -0500
Subject: [PATCH 2/4] Changed to use image_type bit field to determine os_type

---
 reports/pretty.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/reports/pretty.py b/reports/pretty.py
index 81c013b..851b0d7 100644
--- a/reports/pretty.py
+++ b/reports/pretty.py
@@ -77,7 +77,6 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
             failure_type = None
 
             operation = "aux"
-            os_type = "other"
             image_type_num = 0
 
             for raw in raws:
@@ -112,12 +111,20 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
                 if raw.image_type:
                     image_type_num |= raw.image_type
 
+            # Get image (base or snapshot) from image_type bit field
             image = "?"
             if image_type.isset(image_type_num, image_type.BASE_IMAGE):
                 image = "base"
             if image_type.isset(image_type_num, image_type.SNAPSHOT_IMAGE):
                 image = "snap"
 
+            #Get os_type from image_type bit field
+            os_type = "other"
+            if image_type.isset(image_type_num, image_type.LINUX_IMAGE):
+                os_type = "linux"
+            if image_type.isset(image_type_num, image_type.WINDOWS_IMAGE):
+                os_type = "windows"
+
             if not start:
                 continue
 

From 8ecd7a4a819f7db79b02747c17ceeab9cefa6a7d Mon Sep 17 00:00:00 2001
From: Thomas Maddox <thomas.maddox@RACKSPACE.com>
Date: Thu, 20 Jun 2013 11:07:43 -0500
Subject: [PATCH 3/4] removed notification dive

---
 reports/pretty.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/reports/pretty.py b/reports/pretty.py
index 851b0d7..436fb48 100644
--- a/reports/pretty.py
+++ b/reports/pretty.py
@@ -99,15 +99,6 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
                         operation = cmd
                         break
 
-                # Brace yourself. We are now painfully digging into the
-                # notification to get the os_type attribute
-                if os_type == "other" and raw.json:
-                    notification = json.loads(raw.json)
-                    if notification[1]:
-                        os_type = notification[1].get('payload', {})\
-                            .get('image_meta', {})\
-                            .get('os_type', "other")
-
                 if raw.image_type:
                     image_type_num |= raw.image_type
 

From 93a07235442b455c03598c93b432f92e84b0c986 Mon Sep 17 00:00:00 2001
From: Thomas Maddox <thomas.maddox@RACKSPACE.com>
Date: Fri, 21 Jun 2013 13:08:03 -0500
Subject: [PATCH 4/4] clean up

---
 reports/error_details.py | 71 +++++++++++++++++-----------------------
 reports/pretty.py        | 24 +++++++-------
 2 files changed, 42 insertions(+), 53 deletions(-)

diff --git a/reports/error_details.py b/reports/error_details.py
index 1fd883d..a45bfd5 100644
--- a/reports/error_details.py
+++ b/reports/error_details.py
@@ -17,6 +17,16 @@ if __name__ != '__main__':
 
 # To mask unique identifiers for categorizing notifications
 def mask_msg(text):
+    # Needs order because of how precedent effects masking.
+    #
+    # Example: REQ_ID has a UUID in it, but the meaning is different
+    # in this context, so best to grab those first.
+    #
+    # LG_NUM usually represents a memory size; with the number of flavors
+    # this can create a lot of noise.
+    #
+    # The intent is to remove noise from unimportant subtleties
+
     masking_regex = (
         (1, 'REQ_ID',
          r"req-[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}"
@@ -117,14 +127,21 @@ if __name__ == '__main__':
                               day=yesterday.day)
     end = start + datetime.timedelta(hours=length-1, minutes=59, seconds=59)
 
+    deployments = {}
+
     instance_map = {}  # { uuid : [request_id, request_id, ...] }
     exception_counts = {}  # { exception_message : count }
     event_counts = {}  # { event_name : count }
-    metadata = {'report_format': 'json',
-                'instances': instance_map,
-                'exception_counts': exception_counts,
-                'event_counts': event_counts
-                }
+    tenant_issues = {}
+    codes = {}
+    metadata = {
+        'report_format': 'json',
+        'instances': instance_map,
+        'exception_counts': exception_counts,
+        'event_counts': event_counts,
+        'tenant_issues': tenant_issues,
+        'codes': codes,
+    }
 
     # Tell Stacky to format as JSON and set placeholders for various summaries
     report = [metadata]
@@ -132,8 +149,6 @@ if __name__ == '__main__':
     dstart = dt.dt_to_decimal(start)
     dend = dt.dt_to_decimal(end)
 
-    codes = {}
-    deployments = {}
     for deploy in models.Deployment.objects.all():
         deployments[deploy.id] = deploy.name
 
@@ -145,12 +160,6 @@ if __name__ == '__main__':
     expiry = 60 * 60  # 1 hour
     cmds = ['create', 'rebuild', 'rescue', 'resize', 'snapshot']
 
-    failures = {}
-    causes = {}
-    durations = {}
-    successes = {}
-    tenant_issues = {}
-
     for uuid_dict in updates:
         uuid = uuid_dict['instance']
 
@@ -224,42 +233,24 @@ if __name__ == '__main__':
             if not _start:
                 continue
 
-            image = "?"
-            if image_type.isset(image_type_num, image_type.BASE_IMAGE):
-                image = "base"
-            if image_type.isset(image_type_num, image_type.SNAPSHOT_IMAGE):
-                image = "snap"
-
             _end = _when
             diff = _end - _start
 
-            if diff > 3600 and failure_type is None:
-                failure_type = ">60"
+            if diff > 1800 and failure_type is None:
+                failure_type = ">30"
 
-            key = (operation, image_type_num, cell)
-
-            # Track durations for all attempts, good and bad ...
-            duration_min, duration_max, duration_count, duration_total = \
-                durations.get(key, (9999999, 0, 0, 0))
-            duration_min = min(duration_min, diff)
-            duration_max = max(duration_max, diff)
-            duration_count += 1
-            duration_total += diff
-            durations[key] = (duration_min, duration_max, duration_count,
-                              duration_total)
-
-            if not failure_type:
-                successes[key] = successes.get(key, 0) + 1
-            else:
+            if failure_type:
+                key = (operation, image_type_num, cell)
                 failed_request = {}
                 message = []  # For exception message masking
                 req_list.append(req)
                 instance_map[uuid] = req_list
                 failed_request['req'] = req
+                failed_request['uuid'] = uuid
+                failed_request['tenant'] = tenant
                 failed_request['duration'] = "%.2f minutes" % (diff/60)
                 failed_request['operation'] = operation
                 failed_request['platform'] = image_type.readable(image_type_num)
-                failures[key] = failures.get(key, 0) + 1
                 tenant_issues[tenant] = tenant_issues.get(tenant, 0) + 1
 
                 if err_id:
@@ -296,12 +287,12 @@ if __name__ == '__main__':
                             codes[code] = codes.get(code, 0) + 1
                             failure_type = code
                     failed_request['failure_type'] = failure_type
+
                     raws = models.RawData.objects.filter(request_id=req)\
                                          .exclude(event='compute.instance.exists')\
                                          .order_by('when')
 
                     failed_request['details'] = []
-
                     for raw in raws:
                         failure_detail = {}
                         failure_detail['host'] = raw.host
@@ -310,13 +301,11 @@ if __name__ == '__main__':
                         failure_detail['state'] = raw.state
                         failure_detail['old_task'] = raw.old_task
                         failure_detail['task'] = raw.task
+
                         failed_request['details'].append(failure_detail)
 
                     report.append(failed_request)
 
-                cause_key = (key, failure_type)
-                causes[cause_key] = causes.get(cause_key, 0) + 1
-
     # Assign values to store in DB
     values = {'json': json.dumps(report),
               'created': dt.dt_to_decimal(datetime.datetime.utcnow()),
diff --git a/reports/pretty.py b/reports/pretty.py
index 436fb48..654fc40 100644
--- a/reports/pretty.py
+++ b/reports/pretty.py
@@ -16,17 +16,16 @@ from stacktach import models
 def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
                 store=False, region=None, too_long=1800):
     if not yesterday:
-        yesterday = datetime.datetime.utcnow().date() - \
-                    datetime.timedelta(days=1)
+        yesterday = datetime.datetime.utcnow().date() -\
+            datetime.timedelta(days=1)
 
     rstart = datetime.datetime(year=yesterday.year, month=yesterday.month,
-                              day=yesterday.day, hour=start_hour)
+                               day=yesterday.day, hour=start_hour)
     rend = rstart + datetime.timedelta(hours=hours-1, minutes=59, seconds=59)
 
     dstart = dt.dt_to_decimal(rstart)
     dend = dt.dt_to_decimal(rend)
 
-    codes = {}
     too_long_col = '> %d' % (too_long / 60)
 
     cells = []
@@ -87,11 +86,12 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
                     err = raw
                     failure_type = 'http'
 
-                if raw.old_state != 'error' and raw.state == 'error':
+                if failure_type != 'state' and raw.old_state != 'error'\
+                        and raw.state == 'error':
                     failure_type = 'state'
 
                 if raw.old_state == 'error' and \
-                                (not raw.state in ['deleted', 'error']):
+                        (not raw.state in ['deleted', 'error']):
                     failure_type = None
 
                 for cmd in cmds:
@@ -110,7 +110,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
                 image = "snap"
 
             #Get os_type from image_type bit field
-            os_type = "other"
+            os_type = "?"
             if image_type.isset(image_type_num, image_type.LINUX_IMAGE):
                 os_type = "linux"
             if image_type.isset(image_type_num, image_type.WINDOWS_IMAGE):
@@ -122,7 +122,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
             end = raw.when
             diff = end - start
 
-            if diff > too_long and failure_type == None:
+            if diff > too_long and failure_type is None:
                 failure_type = too_long_col
 
             key = (operation, image, os_type)
@@ -158,7 +158,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
     report.append(details)
 
     failure_types = ["4xx", "5xx", too_long_col, "state"]
-    cols = ["Operation", "Image", "OS", "Min", "Max", "Med", "%d%%" % percentile,
+    cols = ["Operation", "Image", "OS Type", "Min", "Max", "Med", "%d%%" % percentile,
             "Requests"]
     for failure_type in failure_types:
         cols.append("%s" % failure_type)
@@ -178,7 +178,7 @@ def make_report(yesterday=None, start_hour=0, hours=24, percentile=97,
             # Sum for grand totals.
             failure_count = breakdown.get(failure_type, 0)
             failure_totals[failure_type] = \
-                         failure_totals.get(failure_type, 0) + failure_count
+                failure_totals.get(failure_type, 0) + failure_count
 
             # Failure percentage for this attempt.
             percentage = float(failure_count) / float(count)
@@ -231,9 +231,9 @@ def valid_date(date):
     try:
         t = time.strptime(date, "%Y-%m-%d")
         return datetime.datetime(*t[:6])
-    except Exception, e:
+    except Exception:
         raise argparse.ArgumentTypeError(
-                                    "'%s' is not in YYYY-MM-DD format." % date)
+            "'%s' is not in YYYY-MM-DD format." % date)
 
 
 if __name__ == '__main__':