From 7323b684b2895c26725ca37cf3f804ec84025b65 Mon Sep 17 00:00:00 2001 From: Ilya Shakhat Date: Fri, 5 Dec 2014 18:28:08 +0300 Subject: [PATCH] Make project list retrieval configurable Now list of projects can be retrieved from both gerrit and github. Change-Id: I4743dc618eff3a69e5d9291cb5c4b680c89dd3cf --- etc/default_data.json | 24 ++++- etc/default_data.schema.json | 14 ++- etc/test_default_data.json | 2 +- requirements-py3.txt | 1 + requirements.txt | 1 + .../processor/default_data_processor.py | 102 ++++++++++++------ stackalytics/processor/main.py | 2 - tests/unit/test_default_data_processor.py | 6 +- 8 files changed, 112 insertions(+), 40 deletions(-) diff --git a/etc/default_data.json b/etc/default_data.json index 2f90906e7..e154644cd 100644 --- a/etc/default_data.json +++ b/etc/default_data.json @@ -10263,6 +10263,10 @@ }, { "organization": "stackforge" + }, + { + "organization": "cloudfoundry", + "uri": "github://" } ], "module_groups": [ @@ -10402,7 +10406,25 @@ { "id": "complimentary", "title": "Complimentary", - "modules": ["docker", "kubernetes", "jcloud"] + "modules": ["docker", "kubernetes", "jclouds", "cloudfoundry"] + }, + { + "id": "cloudfoundry-group", + "title": "CloudFoundary", + "child": true, + "modules": ["cloudfoundry"] + }, + { + "id": "docker-group", + "title": "Docker", + "child": true, + "modules": ["docker"] + }, + { + "id": "kubernetes-group", + "title": "kubernetes", + "child": true, + "modules": ["kubernetes"] } ] } diff --git a/etc/default_data.schema.json b/etc/default_data.schema.json index 5b635cdcb..489f621e2 100644 --- a/etc/default_data.schema.json +++ b/etc/default_data.schema.json @@ -146,9 +146,17 @@ "organization": { "type": "string" }, - "module_group_name": { - "type": "string", - "pattern": "^[\\w-]+$" + "uri": { + "type": "string" + }, + "git_base_uri": { + "type": "string" + }, + "ssh_key_filename": { + "type": "string" + }, + "ssh_username": { + "type": "string" }, "exclude": { "type": "array", diff --git a/etc/test_default_data.json b/etc/test_default_data.json index ff43197dd..d370f3668 100644 --- a/etc/test_default_data.json +++ b/etc/test_default_data.json @@ -109,7 +109,7 @@ "project_sources": [ { "organization": "openstack-ops", - "module_group_name": "OpenStack-Ops" + "uri": "gerrit://review.openstack.org" } ], diff --git a/requirements-py3.txt b/requirements-py3.txt index cc39b1514..418f5033e 100644 --- a/requirements-py3.txt +++ b/requirements-py3.txt @@ -12,6 +12,7 @@ oslo.serialization>=1.0.0 # Apache-2.0 oslo.utils>=1.0.0 # Apache-2.0 paramiko>=1.13.0 psutil>=1.1.1,<2.0.0 +PyGithub python3-memcached>=1.48 PyYAML>=3.1.0 sh diff --git a/requirements.txt b/requirements.txt index 17fb049eb..f8788bb0c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,6 +12,7 @@ oslo.serialization>=1.0.0 # Apache-2.0 oslo.utils>=1.0.0 # Apache-2.0 paramiko>=1.13.0 psutil>=1.1.1,<2.0.0 +PyGithub python-memcached>=1.48 PyYAML>=3.1.0 sh diff --git a/stackalytics/processor/default_data_processor.py b/stackalytics/processor/default_data_processor.py index f8033e4ce..49ad2ca37 100644 --- a/stackalytics/processor/default_data_processor.py +++ b/stackalytics/processor/default_data_processor.py @@ -17,15 +17,21 @@ import collections import hashlib import json +from github import MainClass +from oslo.config import cfg +import re import six from stackalytics.openstack.common import log as logging from stackalytics.processor import normalizer +from stackalytics.processor import rcs from stackalytics.processor import user_processor from stackalytics.processor import utils LOG = logging.getLogger(__name__) +GITHUB_URI_PREFIX = r'^github:\/\/' + def _check_default_data_change(runtime_storage_inst, default_data): h = hashlib.new('sha1') @@ -42,39 +48,76 @@ def _check_default_data_change(runtime_storage_inst, default_data): return True -def _retrieve_project_list_from_gerrit(project_sources, git_base_uri, gerrit): +def _retrieve_project_list_from_sources(project_sources): + for project_source in project_sources: + uri = project_source.get('uri') or cfg.CONF.review_uri + repo_iterator = [] + if re.search(rcs.GERRIT_URI_PREFIX, uri): + repo_iterator = _retrieve_project_list_from_gerrit(project_source) + elif re.search(GITHUB_URI_PREFIX, uri): + repo_iterator = _retrieve_project_list_from_github(project_source) + + exclude = set(project_source.get('exclude', [])) + for repo in repo_iterator: + if repo['module'] not in exclude: + yield repo + + +def _retrieve_project_list_from_gerrit(project_source): LOG.info('Retrieving project list from Gerrit') try: + gerrit = rcs.Gerrit(None, project_source['uri']) + key_filename = (project_source.get('ssh_key_filename') or + cfg.CONF.ssh_key_filename) + username = project_source.get('ssh_username') or cfg.CONF.ssh_username + gerrit.setup(key_filename=key_filename, username=username) + project_list = gerrit.get_project_list() except Exception as e: LOG.exception(e) LOG.warn('Fail to retrieve list of projects. Keep it unmodified') - return False + return - repos = [] - for project_source in project_sources: - organization = project_source['organization'] - LOG.debug('Get list of projects for organization %s', organization) - git_repos = [ - f for f in project_list if f.startswith(organization + "/")] + organization = project_source['organization'] + LOG.debug('Get list of projects for organization %s', organization) + git_repos = [f for f in project_list if f.startswith(organization + "/")] - exclude = set(project_source.get('exclude', [])) + git_base_uri = project_source.get('git_base_uri') or cfg.CONF.git_base_uri - for repo in git_repos: - (org, name) = repo.split('/') - if name not in exclude: - url = '%(git_base_uri)s/%(repo)s.git' % dict( - git_base_uri=git_base_uri, repo=repo) - r = { - 'branches': ['master'], - 'module': name, - 'organization': org, - 'uri': url, - 'releases': [] - } - repos.append(r) - LOG.debug('Project is added to default data: %s', r) - return repos + for repo in git_repos: + (org, name) = repo.split('/') + repo_uri = '%(git_base_uri)s/%(repo)s.git' % dict( + git_base_uri=git_base_uri, repo=repo) + yield { + 'branches': ['master'], + 'module': name, + 'organization': org, + 'uri': repo_uri, + 'releases': [] + } + + +def _retrieve_project_list_from_github(project_source): + LOG.info('Retrieving project list from GitHub') + github = MainClass.Github(timeout=60) + + organization = project_source['organization'] + LOG.debug('Get list of projects for organization %s', organization) + try: + github_repos = github.get_organization(organization).get_repos() + except Exception as e: + LOG.exception(e) + LOG.warn('Fail to retrieve list of projects. Keep it unmodified') + return + + for repo in github_repos: + yield { + 'branches': ['master'], + 'module': repo.name.lower(), + 'organization': organization, + 'uri': repo.git_url, + 'releases': [] + } def _create_module_groups_for_project_sources(project_sources, repos): @@ -96,12 +139,12 @@ def _create_module_groups_for_project_sources(project_sources, repos): return module_groups -def _update_project_list(default_data, git_base_uri, gerrit): +def _update_project_list(default_data): configured_repos = set([r['uri'] for r in default_data['repos']]) - repos = _retrieve_project_list_from_gerrit( - default_data['project_sources'], git_base_uri, gerrit) + repos = _retrieve_project_list_from_sources( + default_data['project_sources']) if repos: default_data['repos'] += [r for r in repos if r['uri'] not in configured_repos] @@ -198,12 +241,11 @@ def _store_default_data(runtime_storage_inst, default_data): runtime_storage_inst.set_by_key(key, value) -def process(runtime_storage_inst, default_data, - git_base_uri, gerrit, driverlog_data_uri): +def process(runtime_storage_inst, default_data, driverlog_data_uri): LOG.debug('Process default data') if 'project_sources' in default_data: - _update_project_list(default_data, git_base_uri, gerrit) + _update_project_list(default_data) _update_with_driverlog_data(default_data, driverlog_data_uri) diff --git a/stackalytics/processor/main.py b/stackalytics/processor/main.py index b55597674..ca7045ab2 100644 --- a/stackalytics/processor/main.py +++ b/stackalytics/processor/main.py @@ -333,8 +333,6 @@ def main(): default_data_processor.process(runtime_storage_inst, default_data, - cfg.CONF.git_base_uri, - gerrit, cfg.CONF.driverlog_data_uri) process_program_list(runtime_storage_inst, cfg.CONF.program_list_uri) diff --git a/tests/unit/test_default_data_processor.py b/tests/unit/test_default_data_processor.py index 0af1453b1..021c30724 100644 --- a/tests/unit/test_default_data_processor.py +++ b/tests/unit/test_default_data_processor.py @@ -72,11 +72,11 @@ class TestDefaultDataProcessor(testtools.TestCase): 'organization': 'stackforge'}, ], 'project_sources': [{'organization': 'openstack', - 'module_group_name': 'OpenStack'}], + 'uri': 'gerrit://'}], 'module_groups': [], } - default_data_processor._update_project_list(dd, None, None) + default_data_processor._update_project_list(dd) self.assertEqual(3, len(dd['repos'])) self.assertIn('qa', set([r['module'] for r in dd['repos']])) @@ -85,7 +85,7 @@ class TestDefaultDataProcessor(testtools.TestCase): self.assertEqual(2, len(dd['module_groups'])) self.assertIn({'id': 'openstack', - 'module_group_name': 'OpenStack', + 'module_group_name': 'openstack', 'modules': ['qa', 'nova'], 'tag': 'organization'}, dd['module_groups']) self.assertIn({'id': 'stackforge',