From fa30f12e0dcab63921726d1262a4a412f5a4979f Mon Sep 17 00:00:00 2001
From: Michael Still <mikal@stillhq.com>
Date: Mon, 13 Jan 2014 21:17:32 +1100
Subject: [PATCH] Handle the new format for migrations from the LogParser.

This patch just converts the world to the new format for the
migrations found by LogParser. I've also moved the migration time
check method to a more generic name, as I am about to use it to
test other attributes of migrations.

Change-Id: Ibf11f3ea0932c1a05caa060ceaa48308b8b5e2e5
---
 .../test_handle_results.py                    | 28 +++----
 turbo_hipster/cmd/analyse_historical.py       | 10 ++-
 .../gate_real_db_upgrade/handle_results.py    | 82 +++++++++++--------
 .../task_plugins/gate_real_db_upgrade/task.py |  8 +-
 4 files changed, 72 insertions(+), 56 deletions(-)

diff --git a/tests/task_plugins/gate_real_db_upgrade/test_handle_results.py b/tests/task_plugins/gate_real_db_upgrade/test_handle_results.py
index eeb27b6..2526aa2 100644
--- a/tests/task_plugins/gate_real_db_upgrade/test_handle_results.py
+++ b/tests/task_plugins/gate_real_db_upgrade/test_handle_results.py
@@ -24,30 +24,26 @@ TESTS_DIR = os.path.join(os.path.dirname(__file__), '../..')
 class TestHandleResults(testtools.TestCase):
     def test_line_to_time(self):
         test_line = '2013-11-22 21:42:45,908 [output] 141 -> 142...  '
-        result = handle_results.line_to_time(test_line)
+        logfile = os.path.join(TESTS_DIR, 'assets/logcontent')
+        lp = handle_results.LogParser(logfile, None)
+        result = lp.line_to_time(test_line)
         self.assertEqual(result, 1385156565)
 
-    def test_migration_time_passes(self):
+    def test_check_migration(self):
         with open(os.path.join(TESTS_DIR,
                                'datasets/some_dataset_example/config.json'),
                   'r') as config_stream:
             dataset_config = json.load(config_stream)
+        duration = 120
 
-        migration_start_time = 1385116665.0
-        migration_end_time = 1385116865.0
-
-        migration_number = '151'
-        result = handle_results.migration_time_passes(migration_number,
-                                                      migration_start_time,
-                                                      migration_end_time,
-                                                      dataset_config)
+        result = handle_results.check_migration({'to': '151'},
+                                                'maximum_migration_times',
+                                                duration, dataset_config)
         self.assertFalse(result)
 
-        migration_number = '152'
-        result = handle_results.migration_time_passes(migration_number,
-                                                      migration_start_time,
-                                                      migration_end_time,
-                                                      dataset_config)
+        result = handle_results.check_migration({'to': '152'},
+                                                'maximum_migration_times',
+                                                duration, dataset_config)
         self.assertTrue(result)
 
     def test_check_log_for_errors(self):
@@ -84,7 +80,7 @@ class TestHandleResults(testtools.TestCase):
 
         migrations = []
         for migration in lp.migrations:
-            migrations.append(migration[0])
+            migrations.append(migration['to'])
 
         for migration in range(134, 229):
             self.assertTrue(migration in migrations,
diff --git a/turbo_hipster/cmd/analyse_historical.py b/turbo_hipster/cmd/analyse_historical.py
index 09002aa..d643c88 100644
--- a/turbo_hipster/cmd/analyse_historical.py
+++ b/turbo_hipster/cmd/analyse_historical.py
@@ -117,13 +117,15 @@ class Analyser(object):
             self.log.warn('Log %s contained no migrations' % name)
 
         for migration in lp.migrations:
-            duration = migration[2] - migration[1]
+            duration = migration['end'] - migration['start']
             self.results.setdefault(engine_name, {})
             self.results[engine_name].setdefault(test_name, {})
-            self.results[engine_name][test_name].setdefault(migration[0], {})
-            self.results[engine_name][test_name][migration[0]]\
+            self.results[engine_name][test_name].setdefault(migration['to'],
+                                                            {})
+            self.results[engine_name][test_name][migration['to']]\
                 .setdefault(duration, 0)
-            self.results[engine_name][test_name][migration[0]][duration] += 1
+            self.results[engine_name][test_name][migration['to']][duration] \
+                += 1
 
 
 if __name__ == '__main__':
diff --git a/turbo_hipster/task_plugins/gate_real_db_upgrade/handle_results.py b/turbo_hipster/task_plugins/gate_real_db_upgrade/handle_results.py
index 927811b..dd415de 100644
--- a/turbo_hipster/task_plugins/gate_real_db_upgrade/handle_results.py
+++ b/turbo_hipster/task_plugins/gate_real_db_upgrade/handle_results.py
@@ -82,12 +82,19 @@ MIGRATION_NUMBER_RE = re.compile('^([0-9]+).*\.py$')
 MIGRATION_START_RE = re.compile('.* ([0-9]+) -\> ([0-9]+)\.\.\..*$')
 MIGRATION_END_RE = re.compile('done$')
 MIGRATION_FINAL_SCHEMA_RE = re.compile('Final schema version is ([0-9]+)')
+INNODB_STATISTIC_RE = re.compile('.* (Innodb_.*)\t([0-9]+)')
 
 
 class LogParser(object):
     def __init__(self, logpath, gitpath):
         self.logpath = logpath
         self.gitpath = gitpath
+        self._reset()
+
+    def _reset(self):
+        self.errors = []
+        self.warnings = []
+        self.migrations = []
 
     def find_schemas(self):
         """Return a list of the schema numbers present in git."""
@@ -98,9 +105,10 @@ class LogParser(object):
 
     def process_log(self):
         """Analyse a log for errors."""
-        self.errors = []
-        self.warnings = []
-        self.migrations = []
+        self._reset()
+        innodb_stats = {}
+        migration_stats = {}
+        current_migration = {}
 
         with open(self.logpath, 'r') as fd:
             migration_started = False
@@ -113,6 +121,12 @@ class LogParser(object):
                 elif 'ImportError' in line:
                     return False, "FAILURE - Could not import required module."
                 elif MIGRATION_START_RE.search(line):
+                    if current_migration:
+                        current_migration['stats'] = migration_stats
+                        self.migrations.append(current_migration)
+                        current_migration = {}
+                        migration_stats = {}
+
                     if migration_started:
                         # We didn't see the last one finish,
                         # something must have failed
@@ -120,22 +134,32 @@ class LogParser(object):
                                            'but did not end')
 
                     migration_started = True
-                    migration_start_time = line_to_time(line)
+                    current_migration['start'] = self.line_to_time(line)
 
                     m = MIGRATION_START_RE.match(line)
-                    migration_number_from = int(m.group(1))
-                    migration_number_to = int(m.group(2))
+                    current_migration['from'] = int(m.group(1))
+                    current_migration['to'] = int(m.group(2))
 
                 elif MIGRATION_END_RE.search(line):
                     if migration_started:
-                        # We found the end to this migration
                         migration_started = False
-                        if migration_number_to > migration_number_from:
-                            migration_end_time = line_to_time(line)
-                            data = (migration_number_to,
-                                    migration_start_time,
-                                    migration_end_time)
-                            self.migrations.append(data)
+                        current_migration['end'] = self.line_to_time(line)
+
+                elif INNODB_STATISTIC_RE.search(line):
+                    # NOTE(mikal): the stats for a migration step come after
+                    # the migration has ended, because they're the next
+                    # command in the script. We don't record them until the
+                    # next migration starts (or we hit the end of the file).
+                    m = INNODB_STATISTIC_RE.match(line)
+                    name = m.group(1)
+                    value = int(m.group(2))
+
+                    if name in innodb_stats:
+                        delta = value - innodb_stats[name]
+                        if delta > 0:
+                            migration_stats[name] = delta
+
+                    innodb_stats[name] = value
 
                 elif 'Final schema version is' in line and self.gitpath:
                     # Check the final version is as expected
@@ -150,30 +174,24 @@ class LogParser(object):
                 self.errors.append('FAILURE - Did not find the end of a '
                                    'migration after a start')
 
+            if current_migration:
+                current_migration['stats'] = migration_stats
+                self.migrations.append(current_migration)
 
-def line_to_time(line):
-    """Extract a timestamp from a log line"""
-    return calendar.timegm(time.strptime(line[:23], '%Y-%m-%d %H:%M:%S,%f'))
+    def line_to_time(self, line):
+        """Extract a timestamp from a log line"""
+        return calendar.timegm(time.strptime(line[:23],
+                                             '%Y-%m-%d %H:%M:%S,%f'))
 
 
-def migration_time_passes(migration_number, migration_start_time,
-                          migration_end_time, dataset_config):
-    """Determines if the difference between the migration_start_time and
-    migration_end_time is acceptable.
-
-    The dataset configuration should specify a default maximum time and any
-    migration specific times in the maximum_migration_times dictionary.
+def check_migration(migration, attribute, value, dataset_config):
+    """Checks if a given migration is within its allowed parameters.
 
     Returns True if okay, False if it takes too long."""
 
-    migration_number = str(migration_number)
-    if migration_number in dataset_config['maximum_migration_times']:
-        allowed_time = \
-            dataset_config['maximum_migration_times'][migration_number]
-    else:
-        allowed_time = dataset_config['maximum_migration_times']['default']
-
-    if (migration_end_time - migration_start_time) > allowed_time:
+    migration_number = str(migration['to'])
+    allowed = dataset_config[attribute].get(
+        migration_number, dataset_config[attribute]['default'])
+    if value > allowed:
         return False
-
     return True
diff --git a/turbo_hipster/task_plugins/gate_real_db_upgrade/task.py b/turbo_hipster/task_plugins/gate_real_db_upgrade/task.py
index fb6d7d5..867c500 100644
--- a/turbo_hipster/task_plugins/gate_real_db_upgrade/task.py
+++ b/turbo_hipster/task_plugins/gate_real_db_upgrade/task.py
@@ -147,10 +147,10 @@ class Runner(object):
                     self.messages.append(warn)
 
             for migration in lp.migrations:
-                if not handle_results.migration_time_passes(migration[0],
-                                                            migration[1],
-                                                            migration[2],
-                                                            dataset['config']):
+                duration = migration['end'] - migration['start']
+                if not (handle_results.check_migration(
+                        migration, 'maximum_migration_times', duration,
+                        dataset['config'])):
                     self.success = False
                     self.messages.append('WARNING - Migration %s took too long'
                                          % migration[0])