Regenerate datasets using innodb stats as well.

I am also now using a cloud database to cache the output, as json wasn't scaling for me. Change-Id: I19ffba481e0db271e080f3bc62e429dbc6c673f7
2014-01-14 18:20:48 +11:00 · 2014-01-14 18:20:48 +11:00 · 5efa05ccc4
commit 5efa05ccc4
parent fa30f12e0d
5 changed files with 66 additions and 11300 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -7,4 +7,6 @@ lockfile
 python-daemon
 extras
 GitPython>=0.3.2.RC1
-sphinxcontrib-programoutput
+sphinxcontrib-programoutput
+
+mysql-python
--- a/results.json
+++ b/results.json
--- a/turbo_hipster/cmd/analyse_historical.py
+++ b/turbo_hipster/cmd/analyse_historical.py
@ -19,6 +19,7 @@ import argparse
 import datetime
 import json
 import logging
+import MySQLdb
 import os
 import re
 import sys
@ -56,7 +57,12 @@ def main():
        auth_version=2.0)
    log.info('Got connection to swift')

-    a = Analyser()
+    # Open the results database
+    db = MySQLdb.connect(host=config['results']['host'],
+                         user=config['results']['username'],
+                         passwd=config['results']['password'],
+                         db=config['results']['database'])
+    cursor = db.cursor(MySQLdb.cursors.DictCursor)

    # Iterate through the logs and determine timing information. This probably
    # should be done in a "more cloudy" way, but this is good enough for now.
@ -69,9 +75,26 @@ def main():

        for item in items:
            log.info('Processing %s' % item['name'])
-            a.process(connection, swift_config['container'], item['name'])
+            cursor.execute('select count(*) from summary where path="%s";'
+                           % item['name'])
+            if cursor.rowcount == 0:
+                for engine, dataset, migration in process(
+                        connection, swift_config['container'], item['name']):
+                    if not 'duration' in migration:
+                        continue
+
+                    cursor.execute('insert ignore into summary'
+                                   '(path, parsed_at, engine, dataset, '
+                                   'migration, duration, stats_json) '
+                                   'values("%s", now(), "%s", '
+                                   '"%s", "%s", %d, "%s");'
+                                   % (item['name'], engine, dataset,
+                                      '%s->%s' % (migration['from'],
+                                                  migration['to']),
+                                      migration['duration'],
+                                      migration['stats']))
+                cursor.execute('commit;')

-        a.dump()
        items = connection.get_container(swift_config['container'],
                                         marker=item['name'], limit=1000)[1]

@ -79,53 +102,40 @@ TEST_NAME1_RE = re.compile('.*/gate-real-db-upgrade_nova_([^_]+)_([^/]*)/.*')
 TEST_NAME2_RE = re.compile('.*/gate-real-db-upgrade_nova_([^_]+)/.*/(.*).log')


-class Analyser(object):
+def process(connection, container, name):
    log = logging.getLogger(__name__)
+    engine_name = None
+    test_name = None

-    def __init__(self):
-        self.results = {}
-
-    def dump(self):
-        with open('results.json', 'w') as f:
-            f.write(json.dumps(self.results, indent=4, sort_keys=True))
-
-    def process(self, connection, container, name):
-        engine_name = None
-        test_name = None
-
-        m = TEST_NAME1_RE.match(name)
+    m = TEST_NAME1_RE.match(name)
+    if m:
+        engine_name = m.group(1)
+        test_name = m.group(2)
+    else:
+        m = TEST_NAME2_RE.match(name)
        if m:
            engine_name = m.group(1)
            test_name = m.group(2)
-        else:
-            m = TEST_NAME2_RE.match(name)
-            if m:
-                engine_name = m.group(1)
-                test_name = m.group(2)

-        if not engine_name or not test_name:
-            self.log.warn('Log name %s does not match regexp' % name)
-            return
+    if not engine_name or not test_name:
+        log.warn('Log name %s does not match regexp' % name)
+        return

-        content = connection.get_object(container, name)[1]
-        with open('/tmp/logcontent', 'w') as f:
-            f.write(content)
+    content = connection.get_object(container, name)[1]
+    with open('/tmp/logcontent', 'w') as f:
+        f.write(content)

-        lp = handle_results.LogParser('/tmp/logcontent', None)
-        lp.process_log()
-        if not lp.migrations:
-            self.log.warn('Log %s contained no migrations' % name)
+    lp = handle_results.LogParser('/tmp/logcontent', None)
+    lp.process_log()
+    if not lp.migrations:
+        log.warn('Log %s contained no migrations' % name)

-        for migration in lp.migrations:
-            duration = migration['end'] - migration['start']
-            self.results.setdefault(engine_name, {})
-            self.results[engine_name].setdefault(test_name, {})
-            self.results[engine_name][test_name].setdefault(migration['to'],
-                                                            {})
-            self.results[engine_name][test_name][migration['to']]\
-                .setdefault(duration, 0)
-            self.results[engine_name][test_name][migration['to']][duration] \
-                += 1
+    for migration in lp.migrations:
+        if not 'start' in migration:
+            continue
+        if not 'end' in migration:
+            continue
+        yield (engine_name, test_name, migration)


 if __name__ == '__main__':
--- a/turbo_hipster/task_plugins/gate_real_db_upgrade/handle_results.py
+++ b/turbo_hipster/task_plugins/gate_real_db_upgrade/handle_results.py
@ -123,6 +123,11 @@ class LogParser(object):
                elif MIGRATION_START_RE.search(line):
                    if current_migration:
                        current_migration['stats'] = migration_stats
+                        if (('start' in current_migration and
+                             'end' in current_migration)):
+                            current_migration['duration'] = (
+                                current_migration['end'] -
+                                current_migration['start'])
                        self.migrations.append(current_migration)
                        current_migration = {}
                        migration_stats = {}
@ -176,6 +181,10 @@ class LogParser(object):

            if current_migration:
                current_migration['stats'] = migration_stats
+                if (('start' in current_migration and
+                     'end' in current_migration)):
+                    current_migration['duration'] = (
+                        current_migration['end'] - current_migration['start'])
                self.migrations.append(current_migration)

    def line_to_time(self, line):
--- a/turbo_hipster/task_plugins/gate_real_db_upgrade/task.py
+++ b/turbo_hipster/task_plugins/gate_real_db_upgrade/task.py
@ -147,9 +147,10 @@ class Runner(object):
                    self.messages.append(warn)

            for migration in lp.migrations:
-                duration = migration['end'] - migration['start']
                if not (handle_results.check_migration(
-                        migration, 'maximum_migration_times', duration,
+                        migration,
+                        'maximum_migration_times',
+                        migration['duration'],
                        dataset['config'])):
                    self.success = False
                    self.messages.append('WARNING - Migration %s took too long'