Handle the new format for migrations from the LogParser.

This patch just converts the world to the new format for the migrations found by LogParser. I've also moved the migration time check method to a more generic name, as I am about to use it to test other attributes of migrations. Change-Id: Ibf11f3ea0932c1a05caa060ceaa48308b8b5e2e5
2014-01-13 21:17:32 +11:00 · 2014-01-13 21:17:32 +11:00 · fa30f12e0d
commit fa30f12e0d
parent e7399f6b2e
4 changed files with 72 additions and 56 deletions
--- a/tests/task_plugins/gate_real_db_upgrade/test_handle_results.py
+++ b/tests/task_plugins/gate_real_db_upgrade/test_handle_results.py
@ -24,30 +24,26 @@ TESTS_DIR = os.path.join(os.path.dirname(__file__), '../..')
 class TestHandleResults(testtools.TestCase):
    def test_line_to_time(self):
        test_line = '2013-11-22 21:42:45,908 [output] 141 -> 142...  '
-        result = handle_results.line_to_time(test_line)
+        logfile = os.path.join(TESTS_DIR, 'assets/logcontent')
+        lp = handle_results.LogParser(logfile, None)
+        result = lp.line_to_time(test_line)
        self.assertEqual(result, 1385156565)

-    def test_migration_time_passes(self):
+    def test_check_migration(self):
        with open(os.path.join(TESTS_DIR,
                               'datasets/some_dataset_example/config.json'),
                  'r') as config_stream:
            dataset_config = json.load(config_stream)
+        duration = 120

-        migration_start_time = 1385116665.0
-        migration_end_time = 1385116865.0
-
-        migration_number = '151'
-        result = handle_results.migration_time_passes(migration_number,
-                                                      migration_start_time,
-                                                      migration_end_time,
-                                                      dataset_config)
+        result = handle_results.check_migration({'to': '151'},
+                                                'maximum_migration_times',
+                                                duration, dataset_config)
        self.assertFalse(result)

-        migration_number = '152'
-        result = handle_results.migration_time_passes(migration_number,
-                                                      migration_start_time,
-                                                      migration_end_time,
-                                                      dataset_config)
+        result = handle_results.check_migration({'to': '152'},
+                                                'maximum_migration_times',
+                                                duration, dataset_config)
        self.assertTrue(result)

    def test_check_log_for_errors(self):
@ -84,7 +80,7 @@ class TestHandleResults(testtools.TestCase):

        migrations = []
        for migration in lp.migrations:
-            migrations.append(migration[0])
+            migrations.append(migration['to'])

        for migration in range(134, 229):
            self.assertTrue(migration in migrations,
--- a/turbo_hipster/cmd/analyse_historical.py
+++ b/turbo_hipster/cmd/analyse_historical.py
@ -117,13 +117,15 @@ class Analyser(object):
            self.log.warn('Log %s contained no migrations' % name)

        for migration in lp.migrations:
-            duration = migration[2] - migration[1]
+            duration = migration['end'] - migration['start']
            self.results.setdefault(engine_name, {})
            self.results[engine_name].setdefault(test_name, {})
-            self.results[engine_name][test_name].setdefault(migration[0], {})
-            self.results[engine_name][test_name][migration[0]]\
+            self.results[engine_name][test_name].setdefault(migration['to'],
+                                                            {})
+            self.results[engine_name][test_name][migration['to']]\
                .setdefault(duration, 0)
-            self.results[engine_name][test_name][migration[0]][duration] += 1
+            self.results[engine_name][test_name][migration['to']][duration] \
+                += 1


 if __name__ == '__main__':
--- a/turbo_hipster/task_plugins/gate_real_db_upgrade/handle_results.py
+++ b/turbo_hipster/task_plugins/gate_real_db_upgrade/handle_results.py
@ -82,12 +82,19 @@ MIGRATION_NUMBER_RE = re.compile('^([0-9]+).*\.py$')
 MIGRATION_START_RE = re.compile('.* ([0-9]+) -\> ([0-9]+)\.\.\..*$')
 MIGRATION_END_RE = re.compile('done$')
 MIGRATION_FINAL_SCHEMA_RE = re.compile('Final schema version is ([0-9]+)')
+INNODB_STATISTIC_RE = re.compile('.* (Innodb_.*)\t([0-9]+)')


 class LogParser(object):
    def __init__(self, logpath, gitpath):
        self.logpath = logpath
        self.gitpath = gitpath
+        self._reset()
+
+    def _reset(self):
+        self.errors = []
+        self.warnings = []
+        self.migrations = []

    def find_schemas(self):
        """Return a list of the schema numbers present in git."""
@ -98,9 +105,10 @@ class LogParser(object):

    def process_log(self):
        """Analyse a log for errors."""
-        self.errors = []
-        self.warnings = []
-        self.migrations = []
+        self._reset()
+        innodb_stats = {}
+        migration_stats = {}
+        current_migration = {}

        with open(self.logpath, 'r') as fd:
            migration_started = False
@ -113,6 +121,12 @@ class LogParser(object):
                elif 'ImportError' in line:
                    return False, "FAILURE - Could not import required module."
                elif MIGRATION_START_RE.search(line):
+                    if current_migration:
+                        current_migration['stats'] = migration_stats
+                        self.migrations.append(current_migration)
+                        current_migration = {}
+                        migration_stats = {}
+
                    if migration_started:
                        # We didn't see the last one finish,
                        # something must have failed
@ -120,22 +134,32 @@ class LogParser(object):
                                           'but did not end')

                    migration_started = True
-                    migration_start_time = line_to_time(line)
+                    current_migration['start'] = self.line_to_time(line)

                    m = MIGRATION_START_RE.match(line)
-                    migration_number_from = int(m.group(1))
-                    migration_number_to = int(m.group(2))
+                    current_migration['from'] = int(m.group(1))
+                    current_migration['to'] = int(m.group(2))

                elif MIGRATION_END_RE.search(line):
                    if migration_started:
-                        # We found the end to this migration
                        migration_started = False
-                        if migration_number_to > migration_number_from:
-                            migration_end_time = line_to_time(line)
-                            data = (migration_number_to,
-                                    migration_start_time,
-                                    migration_end_time)
-                            self.migrations.append(data)
+                        current_migration['end'] = self.line_to_time(line)
+
+                elif INNODB_STATISTIC_RE.search(line):
+                    # NOTE(mikal): the stats for a migration step come after
+                    # the migration has ended, because they're the next
+                    # command in the script. We don't record them until the
+                    # next migration starts (or we hit the end of the file).
+                    m = INNODB_STATISTIC_RE.match(line)
+                    name = m.group(1)
+                    value = int(m.group(2))
+
+                    if name in innodb_stats:
+                        delta = value - innodb_stats[name]
+                        if delta > 0:
+                            migration_stats[name] = delta
+
+                    innodb_stats[name] = value

                elif 'Final schema version is' in line and self.gitpath:
                    # Check the final version is as expected
@ -150,30 +174,24 @@ class LogParser(object):
                self.errors.append('FAILURE - Did not find the end of a '
                                   'migration after a start')

+            if current_migration:
+                current_migration['stats'] = migration_stats
+                self.migrations.append(current_migration)

-def line_to_time(line):
+    def line_to_time(self, line):
        """Extract a timestamp from a log line"""
-    return calendar.timegm(time.strptime(line[:23], '%Y-%m-%d %H:%M:%S,%f'))
+        return calendar.timegm(time.strptime(line[:23],
+                                             '%Y-%m-%d %H:%M:%S,%f'))


-def migration_time_passes(migration_number, migration_start_time,
-                          migration_end_time, dataset_config):
-    """Determines if the difference between the migration_start_time and
-    migration_end_time is acceptable.
-
-    The dataset configuration should specify a default maximum time and any
-    migration specific times in the maximum_migration_times dictionary.
+def check_migration(migration, attribute, value, dataset_config):
+    """Checks if a given migration is within its allowed parameters.

    Returns True if okay, False if it takes too long."""

-    migration_number = str(migration_number)
-    if migration_number in dataset_config['maximum_migration_times']:
-        allowed_time = \
-            dataset_config['maximum_migration_times'][migration_number]
-    else:
-        allowed_time = dataset_config['maximum_migration_times']['default']
-
-    if (migration_end_time - migration_start_time) > allowed_time:
+    migration_number = str(migration['to'])
+    allowed = dataset_config[attribute].get(
+        migration_number, dataset_config[attribute]['default'])
+    if value > allowed:
        return False
-
    return True
--- a/turbo_hipster/task_plugins/gate_real_db_upgrade/task.py
+++ b/turbo_hipster/task_plugins/gate_real_db_upgrade/task.py
@ -147,10 +147,10 @@ class Runner(object):
                    self.messages.append(warn)

            for migration in lp.migrations:
-                if not handle_results.migration_time_passes(migration[0],
-                                                            migration[1],
-                                                            migration[2],
-                                                            dataset['config']):
+                duration = migration['end'] - migration['start']
+                if not (handle_results.check_migration(
+                        migration, 'maximum_migration_times', duration,
+                        dataset['config'])):
                    self.success = False
                    self.messages.append('WARNING - Migration %s took too long'
                                         % migration[0])