From ddb659db6e76667fefb54e5a4a4f9c7bcb4f08ce Mon Sep 17 00:00:00 2001 From: Ilya Shakhat Date: Tue, 6 May 2014 16:31:37 +0400 Subject: [PATCH] Fixed processing of module groups from default_data Closes bug 1314558 Change-Id: If29deca08942f8987a1176cacdd57d0c15a248fd --- .../processor/default_data_processor.py | 29 +++++++++---------- stackalytics/processor/main.py | 21 ++------------ stackalytics/processor/utils.py | 7 +++++ tests/unit/test_default_data_processor.py | 6 ++-- 4 files changed, 26 insertions(+), 37 deletions(-) diff --git a/stackalytics/processor/default_data_processor.py b/stackalytics/processor/default_data_processor.py index d76ed4698..79212e0d2 100644 --- a/stackalytics/processor/default_data_processor.py +++ b/stackalytics/processor/default_data_processor.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import collections import hashlib import json @@ -74,14 +75,10 @@ def _retrieve_project_list_from_github(project_sources): return repos -def _create_module_groups(project_sources, repos): - organizations = {} +def _create_module_groups_for_project_sources(project_sources, repos): + organizations = collections.defaultdict(list) for repo in repos: - ogn = repo['organization'] - if ogn in organizations: - organizations[ogn].append(repo['module']) - else: - organizations[ogn] = [repo['module']] + organizations[repo['organization']].append(repo['module']) ps_organizations = dict([(ps.get('organization'), ps.get('module_group_name') or @@ -90,12 +87,9 @@ def _create_module_groups(project_sources, repos): module_groups = [] for ogn, modules in six.iteritems(organizations): - if ogn in ps_organizations: - module_group_name = ps_organizations[ogn] - else: - module_group_name = ogn - module_groups.append({'module_group_name': module_group_name, - 'modules': modules, 'tag': 'organization'}) + module_groups.append(utils.make_module_group( + ogn, name=ps_organizations.get(ogn, ogn), modules=modules, + tag='organization')) return module_groups @@ -109,7 +103,7 @@ def _update_project_list(default_data): default_data['repos'] += [r for r in repos if r['uri'] not in configured_repos] - default_data['module_groups'] += _create_module_groups( + default_data['module_groups'] += _create_module_groups_for_project_sources( default_data['project_sources'], default_data['repos']) @@ -138,8 +132,11 @@ def _store_companies(runtime_storage_inst, companies): def _store_module_groups(runtime_storage_inst, module_groups): stored_mg = runtime_storage_inst.get_by_key('module_groups') or {} for mg in module_groups: - mg['id'] = mg['module_group_name'] - stored_mg[mg['id']] = mg + name = mg['module_group_name'] + module_group_id = mg.get('id') or name + stored_mg[module_group_id] = utils.make_module_group( + module_group_id, name=name, modules=mg['modules'], + tag=mg.get('tag', 'group')) runtime_storage_inst.set_by_key('module_groups', stored_mg) diff --git a/stackalytics/processor/main.py b/stackalytics/processor/main.py index a23aebf77..69b39a61f 100644 --- a/stackalytics/processor/main.py +++ b/stackalytics/processor/main.py @@ -187,13 +187,6 @@ def apply_corrections(uri, runtime_storage_inst): runtime_storage_inst.apply_corrections(valid_corrections) -def _make_module_group(group_id, name, modules, tag=None): - module_group = {'id': group_id, 'module_group_name': name, - 'modules': modules, 'tag': tag} - LOG.debug('New module group: %s', module_group) - return module_group - - def _read_official_programs_yaml(program_list_uri, release_names): LOG.debug('Process list of programs from uri: %s', program_list_uri) content = yaml.safe_load(utils.read_uri(program_list_uri)) @@ -259,20 +252,10 @@ def process_program_list(runtime_storage_inst, program_list_uri): repos = runtime_storage_inst.get_by_key('repos') or [] for repo in repos: module = repo['module'] - module_groups[module] = { - 'id': module, - 'module_group_name': module, - 'modules': [module], - 'tag': 'module' - } + module_groups[module] = utils.make_module_group(module, tag='module') # register module 'unknown' - used for emails not mapped to any module - module_groups['unknown'] = { - 'id': 'unknown', - 'module_group_name': 'unknown', - 'modules': ['unknown'], - 'tag': 'module' - } + module_groups['unknown'] = utils.make_module_group('unknown', tag='module') runtime_storage_inst.set_by_key('module_groups', module_groups) diff --git a/stackalytics/processor/utils.py b/stackalytics/processor/utils.py index 9d86a81ed..16032e366 100644 --- a/stackalytics/processor/utils.py +++ b/stackalytics/processor/utils.py @@ -182,3 +182,10 @@ def add_index(sequence, start=1, item_filter=lambda x: True): def safe_encode(s): return parse.quote_plus(s.encode('utf-8')) + + +def make_module_group(module_group_id, name=None, modules=None, tag='module'): + return {'id': module_group_id, + 'module_group_name': name or module_group_id, + 'modules': modules or [module_group_id], + 'tag': tag} diff --git a/tests/unit/test_default_data_processor.py b/tests/unit/test_default_data_processor.py index 884776676..a40b387d3 100644 --- a/tests/unit/test_default_data_processor.py +++ b/tests/unit/test_default_data_processor.py @@ -81,9 +81,11 @@ class TestDefaultDataProcessor(testtools.TestCase): self.assertIn('tux', set([r['module'] for r in dd['repos']])) self.assertEqual(2, len(dd['module_groups'])) - self.assertIn({'module_group_name': 'OpenStack', + self.assertIn({'id': 'openstack', + 'module_group_name': 'OpenStack', 'modules': ['qa', 'nova'], 'tag': 'organization'}, dd['module_groups']) - self.assertIn({'module_group_name': 'stackforge', + self.assertIn({'id': 'stackforge', + 'module_group_name': 'stackforge', 'modules': ['tux'], 'tag': 'organization'}, dd['module_groups'])